Ver código fonte

Merge a numpy compatible with v1.x (#305)

* Add numpy

* Some refactor
Anurag Bhat 1 ano atrás
pai
commit
b2bbd09263
100 arquivos alterados com 73378 adições e 0 exclusões
  1. 49 0
      3rd/numpy/README.md
  2. 1037 0
      3rd/numpy/include/numpy.hpp
  3. 347 0
      3rd/numpy/include/xtensor/xaccessible.hpp
  4. 370 0
      3rd/numpy/include/xtensor/xaccumulator.hpp
  5. 921 0
      3rd/numpy/include/xtensor/xadapt.hpp
  6. 667 0
      3rd/numpy/include/xtensor/xarray.hpp
  7. 1367 0
      3rd/numpy/include/xtensor/xassign.hpp
  8. 349 0
      3rd/numpy/include/xtensor/xaxis_iterator.hpp
  9. 367 0
      3rd/numpy/include/xtensor/xaxis_slice_iterator.hpp
  10. 533 0
      3rd/numpy/include/xtensor/xblockwise_reducer.hpp
  11. 503 0
      3rd/numpy/include/xtensor/xblockwise_reducer_functors.hpp
  12. 482 0
      3rd/numpy/include/xtensor/xbroadcast.hpp
  13. 1282 0
      3rd/numpy/include/xtensor/xbuffer_adaptor.hpp
  14. 1261 0
      3rd/numpy/include/xtensor/xbuilder.hpp
  15. 686 0
      3rd/numpy/include/xtensor/xchunked_array.hpp
  16. 378 0
      3rd/numpy/include/xtensor/xchunked_assign.hpp
  17. 295 0
      3rd/numpy/include/xtensor/xchunked_view.hpp
  18. 264 0
      3rd/numpy/include/xtensor/xcomplex.hpp
  19. 1192 0
      3rd/numpy/include/xtensor/xcontainer.hpp
  20. 808 0
      3rd/numpy/include/xtensor/xdynamic_view.hpp
  21. 177 0
      3rd/numpy/include/xtensor/xeval.hpp
  22. 373 0
      3rd/numpy/include/xtensor/xexception.hpp
  23. 769 0
      3rd/numpy/include/xtensor/xexpression.hpp
  24. 273 0
      3rd/numpy/include/xtensor/xexpression_holder.hpp
  25. 198 0
      3rd/numpy/include/xtensor/xexpression_traits.hpp
  26. 991 0
      3rd/numpy/include/xtensor/xfixed.hpp
  27. 1193 0
      3rd/numpy/include/xtensor/xfunction.hpp
  28. 1649 0
      3rd/numpy/include/xtensor/xfunctor_view.hpp
  29. 528 0
      3rd/numpy/include/xtensor/xgenerator.hpp
  30. 614 0
      3rd/numpy/include/xtensor/xhistogram.hpp
  31. 852 0
      3rd/numpy/include/xtensor/xindex_view.hpp
  32. 142 0
      3rd/numpy/include/xtensor/xinfo.hpp
  33. 832 0
      3rd/numpy/include/xtensor/xio.hpp
  34. 1369 0
      3rd/numpy/include/xtensor/xiterable.hpp
  35. 1303 0
      3rd/numpy/include/xtensor/xiterator.hpp
  36. 104 0
      3rd/numpy/include/xtensor/xlayout.hpp
  37. 1145 0
      3rd/numpy/include/xtensor/xmanipulation.hpp
  38. 676 0
      3rd/numpy/include/xtensor/xmasked_view.hpp
  39. 3329 0
      3rd/numpy/include/xtensor/xmath.hpp
  40. 431 0
      3rd/numpy/include/xtensor/xmime.hpp
  41. 131 0
      3rd/numpy/include/xtensor/xmultiindex_iterator.hpp
  42. 230 0
      3rd/numpy/include/xtensor/xnoalias.hpp
  43. 661 0
      3rd/numpy/include/xtensor/xnorm.hpp
  44. 803 0
      3rd/numpy/include/xtensor/xnpy.hpp
  45. 95 0
      3rd/numpy/include/xtensor/xoffset_view.hpp
  46. 997 0
      3rd/numpy/include/xtensor/xoperation.hpp
  47. 323 0
      3rd/numpy/include/xtensor/xpad.hpp
  48. 1007 0
      3rd/numpy/include/xtensor/xrandom.hpp
  49. 1903 0
      3rd/numpy/include/xtensor/xreducer.hpp
  50. 705 0
      3rd/numpy/include/xtensor/xrepeat.hpp
  51. 1098 0
      3rd/numpy/include/xtensor/xscalar.hpp
  52. 796 0
      3rd/numpy/include/xtensor/xsemantic.hpp
  53. 213 0
      3rd/numpy/include/xtensor/xset_operation.hpp
  54. 578 0
      3rd/numpy/include/xtensor/xshape.hpp
  55. 1671 0
      3rd/numpy/include/xtensor/xslice.hpp
  56. 1353 0
      3rd/numpy/include/xtensor/xsort.hpp
  57. 1984 0
      3rd/numpy/include/xtensor/xstorage.hpp
  58. 921 0
      3rd/numpy/include/xtensor/xstrided_view.hpp
  59. 970 0
      3rd/numpy/include/xtensor/xstrided_view_base.hpp
  60. 916 0
      3rd/numpy/include/xtensor/xstrides.hpp
  61. 983 0
      3rd/numpy/include/xtensor/xtensor.hpp
  62. 137 0
      3rd/numpy/include/xtensor/xtensor_config.hpp
  63. 209 0
      3rd/numpy/include/xtensor/xtensor_forward.hpp
  64. 333 0
      3rd/numpy/include/xtensor/xtensor_simd.hpp
  65. 1134 0
      3rd/numpy/include/xtensor/xutils.hpp
  66. 104 0
      3rd/numpy/include/xtensor/xvectorize.hpp
  67. 2317 0
      3rd/numpy/include/xtensor/xview.hpp
  68. 283 0
      3rd/numpy/include/xtensor/xview_utils.hpp
  69. 477 0
      3rd/numpy/include/xtl/xany.hpp
  70. 77 0
      3rd/numpy/include/xtl/xbase64.hpp
  71. 2436 0
      3rd/numpy/include/xtl/xbasic_fixed_string.hpp
  72. 435 0
      3rd/numpy/include/xtl/xclosure.hpp
  73. 179 0
      3rd/numpy/include/xtl/xcompare.hpp
  74. 1361 0
      3rd/numpy/include/xtl/xcomplex.hpp
  75. 578 0
      3rd/numpy/include/xtl/xcomplex_sequence.hpp
  76. 1356 0
      3rd/numpy/include/xtl/xdynamic_bitset.hpp
  77. 44 0
      3rd/numpy/include/xtl/xfunctional.hpp
  78. 41 0
      3rd/numpy/include/xtl/xhalf_float.hpp
  79. 4036 0
      3rd/numpy/include/xtl/xhalf_float_impl.hpp
  80. 208 0
      3rd/numpy/include/xtl/xhash.hpp
  81. 73 0
      3rd/numpy/include/xtl/xhierarchy_generator.hpp
  82. 422 0
      3rd/numpy/include/xtl/xiterator_base.hpp
  83. 546 0
      3rd/numpy/include/xtl/xmasked_value.hpp
  84. 41 0
      3rd/numpy/include/xtl/xmasked_value_meta.hpp
  85. 640 0
      3rd/numpy/include/xtl/xmeta_utils.hpp
  86. 422 0
      3rd/numpy/include/xtl/xmultimethods.hpp
  87. 1331 0
      3rd/numpy/include/xtl/xoptional.hpp
  88. 141 0
      3rd/numpy/include/xtl/xoptional_meta.hpp
  89. 622 0
      3rd/numpy/include/xtl/xoptional_sequence.hpp
  90. 42 0
      3rd/numpy/include/xtl/xplatform.hpp
  91. 48 0
      3rd/numpy/include/xtl/xproxy_wrapper.hpp
  92. 215 0
      3rd/numpy/include/xtl/xsequence.hpp
  93. 21 0
      3rd/numpy/include/xtl/xspan.hpp
  94. 779 0
      3rd/numpy/include/xtl/xspan_impl.hpp
  95. 125 0
      3rd/numpy/include/xtl/xsystem.hpp
  96. 44 0
      3rd/numpy/include/xtl/xtl_config.hpp
  97. 458 0
      3rd/numpy/include/xtl/xtype_traits.hpp
  98. 206 0
      3rd/numpy/include/xtl/xvariant.hpp
  99. 2818 0
      3rd/numpy/include/xtl/xvariant_impl.hpp
  100. 195 0
      3rd/numpy/include/xtl/xvisitor.hpp

+ 49 - 0
3rd/numpy/README.md

@@ -0,0 +1,49 @@
+# numpy
+
+### How to run **numpy** module programs with **gsoc-2024-dev** [pybind11](https://github.com/pocketpy/gsoc-2024-dev/tree/main/pybind11)
+
+1. Prepare the python code file with the numpy operations you want to run. \
+\
+ For example : let's try out numpy [arange](https://numpy.org/doc/stable/reference/generated/numpy.arange.html) function in `test_numpy.py`
+```py
+  import numpy_bindings as np
+  
+  def test_arange(n):
+      a = np.arange(n)
+      print(a.sum())
+  
+  test_arange(100)
+  ```
+
+2. Read the script and execute it in `test_numpy.cpp`. 
+```cpp
+  #include <pybind11/embed.h>
+  #include <fstream>
+  #include <sstream>
+  #include <string>
+  
+  namespace py = pybind11;
+  using namespace pybind11;
+  
+  int main() {
+      py::scoped_interpreter guard{};
+      std::ifstream file("test_numpy.py");
+      std::stringstream buffer;
+      buffer << file.rdbuf();
+      std::string script = buffer.str();
+      py::exec(script);
+  
+      return 0;
+  }
+```
+
+3. Build the project at root to generate the executable at `build/gsoc2024`.
+```sh
+  cmake -B build
+  cmake --build build
+```
+4. Now run the executable to get the output. 
+```sh
+  |base| gsoc-2024-dev ±|main ✗|→ build/gsoc2024 
+  4950
+```

+ 1037 - 0
3rd/numpy/include/numpy.hpp

@@ -0,0 +1,1037 @@
+#pragma once
+
+#include <any>
+#include <cstdint>
+#include <complex>
+#include <chrono>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+// Suppress xtensor warnings if SUPPRESS_XTENSOR_WARNINGS is set
+#ifdef SUPPRESS_XTENSOR_WARNINGS
+    #ifdef _MSC_VER
+        #pragma warning(push, 0)
+    #else
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wall"
+        #pragma GCC diagnostic ignored "-Wextra"
+        #pragma GCC system_header
+    #endif
+#endif
+
+#include <xtensor/xarray.hpp>
+#include <xtensor/xio.hpp>
+#include <xtensor/xmath.hpp>
+#include <xtensor/xrandom.hpp>
+#include <xtensor/xsort.hpp>
+#include <xtensor/xview.hpp>
+
+#ifdef SUPPRESS_XTENSOR_WARNINGS
+    #ifdef _MSC_VER
+        #pragma warning(pop)
+    #else
+        #pragma GCC diagnostic pop
+    #endif
+#endif
+
+namespace pkpy {
+
+// Type aliases
+using int8 = int8_t;
+using int16 = int16_t;
+using int32 = int32_t;
+using int64 = int64_t;
+using uint8 = uint8_t;
+using uint16 = uint16_t;
+using uint32 = uint32_t;
+using uint64 = uint64_t;
+using int_ = int64;
+using float32 = float;
+using float64 = double;
+using float_ = float64;
+using bool_ = bool;
+using complex64 = std::complex<float32>;
+using complex128 = std::complex<float64>;
+using complex_ = complex128;
+using string = std::string;
+
+template <typename T>
+struct dtype_traits {
+    constexpr const static char* name = "unknown";
+};
+
+#define REGISTER_DTYPE(Type, Name)                                                                                     \
+    template <>                                                                                                        \
+    struct dtype_traits<Type> {                                                                                        \
+        static constexpr const char* name = Name;                                                                      \
+    };
+
+REGISTER_DTYPE(int8_t, "int8");
+REGISTER_DTYPE(int16_t, "int16");
+REGISTER_DTYPE(int32_t, "int32");
+REGISTER_DTYPE(int64_t, "int64");
+REGISTER_DTYPE(uint8_t, "uint8");
+REGISTER_DTYPE(uint16_t, "uint16");
+REGISTER_DTYPE(uint32_t, "uint32");
+REGISTER_DTYPE(uint64_t, "uint64");
+REGISTER_DTYPE(float, "float32");
+REGISTER_DTYPE(float_, "float64");
+REGISTER_DTYPE(bool_, "bool");
+REGISTER_DTYPE(std::complex<float32>, "complex64");
+REGISTER_DTYPE(std::complex<float64>, "complex128");
+
+using _Dtype = std::string;
+using _ShapeLike = std::vector<int>;
+
+namespace numpy {
+
+template <typename T>
+class ndarray;
+
+template <typename T>
+constexpr inline auto is_ndarray_v = false;
+
+template <typename T>
+constexpr inline auto is_ndarray_v<ndarray<T>> = true;
+
+template <typename T>
+class ndarray {
+public:
+    // Constructor for xtensor xarray
+    ndarray() = default;
+
+    ndarray(const T scalar) : _array(scalar) {}
+
+    ndarray(const xt::xarray<T>& arr) : _array(arr) {}
+
+    // Constructor for mutli-dimensional array
+    ndarray(std::initializer_list<T> init_list) : _array(init_list) {}
+
+    ndarray(std::initializer_list<std::initializer_list<T>> init_list) : _array(init_list) {}
+
+    ndarray(std::initializer_list<std::initializer_list<std::initializer_list<T>>> init_list) : _array(init_list) {}
+
+    ndarray(std::initializer_list<std::initializer_list<std::initializer_list<std::initializer_list<T>>>> init_list) :
+        _array(init_list) {}
+
+    ndarray(std::initializer_list<
+            std::initializer_list<std::initializer_list<std::initializer_list<std::initializer_list<T>>>>> init_list) :
+        _array(init_list) {}
+
+    // Accessor function for _array
+    const xt::xarray<T>& get_array() const { return _array; }
+
+    // Properties
+    _Dtype dtype() const { return dtype_traits<T>::name; }
+
+    int ndim() const { return static_cast<int>(_array.dimension()); }
+
+    int size() const { return static_cast<int>(_array.size()); }
+
+    _ShapeLike shape() const { return _ShapeLike(_array.shape().begin(), _array.shape().end()); }
+
+    // Dunder Methods
+    template <typename U>
+    auto operator== (const ndarray<U>& other) const {
+        return ndarray<bool_>(xt::equal(_array, other.get_array()));
+    }
+
+    template <typename U>
+    auto operator!= (const ndarray<U>& other) const {
+        return ndarray<bool_>(xt::not_equal(_array, other.get_array()));
+    }
+
+    template <typename U>
+    auto operator+ (const ndarray<U>& other) const {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = xt::cast<result_type>(_array) + xt::cast<result_type>(other.get_array());
+        return ndarray<result_type>(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    auto operator+ (const U& other) const {
+        return binary_operator_add_impl<U>(other);
+    }
+
+    template <typename U>
+    auto binary_operator_add_impl(const U& other) const {
+        if constexpr(std::is_same_v<U, float_>) {
+            xt::xarray<float_> result = xt::cast<float_>(_array) + other;
+            return ndarray<float_>(result);
+        } else {
+            using result_type = std::common_type_t<T, U>;
+            xt::xarray<result_type> result = xt::cast<result_type>(_array) + other;
+            return ndarray<result_type>(result);
+        }
+    }
+
+    template <typename U>
+    auto operator- (const ndarray<U>& other) const {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = xt::cast<result_type>(_array) - xt::cast<result_type>(other.get_array());
+        return ndarray<result_type>(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    auto operator- (const U& other) const {
+        return binary_operator_sub_impl<U>(other);
+    }
+
+    template <typename U>
+    auto binary_operator_sub_impl(const U& other) const {
+        if constexpr(std::is_same_v<U, float_>) {
+            xt::xarray<float_> result = xt::cast<float_>(_array) - other;
+            return ndarray<float_>(result);
+        } else {
+            using result_type = std::common_type_t<T, U>;
+            xt::xarray<result_type> result = xt::cast<result_type>(_array) - other;
+            return ndarray<result_type>(result);
+        }
+    }
+
+    template <typename U>
+    auto operator* (const ndarray<U>& other) const {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = xt::cast<result_type>(_array) * xt::cast<result_type>(other.get_array());
+        return ndarray<result_type>(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    auto operator* (const U& other) const {
+        return binary_operator_mul_impl<U>(other);
+    }
+
+    template <typename U>
+    auto binary_operator_mul_impl(const U& other) const {
+        if constexpr(std::is_same_v<U, float_>) {
+            xt::xarray<float_> result = xt::cast<float_>(_array) * other;
+            return ndarray<float_>(result);
+        } else {
+            using result_type = std::common_type_t<T, U>;
+            xt::xarray<result_type> result = xt::cast<result_type>(_array) * other;
+            return ndarray<result_type>(result);
+        }
+    }
+
+    template <typename U>
+    auto operator/ (const ndarray<U>& other) const {
+        using result_type = std::conditional_t<std::is_same_v<T, bool> || std::is_same_v<U, bool>, float64, std::common_type_t<T, U>>;
+        xt::xarray<result_type> result = xt::cast<result_type>(_array) / xt::cast<result_type>(other.get_array());
+        return ndarray<result_type>(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    auto operator/ (const U& other) const {
+        return binary_operator_truediv_impl<U>(other);
+    }
+
+    template <typename U>
+    auto binary_operator_truediv_impl(const U& other) const {
+        xt::xarray<float_> result = xt::cast<float_>(_array) / static_cast<float_>(other);
+        return ndarray<float_>(result);
+    }
+
+    template <typename U>
+    auto pow(const ndarray<U>& other) const {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result =
+            xt::pow(xt::cast<result_type>(_array), xt::cast<result_type>(other.get_array()));
+        return ndarray<result_type>(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    auto pow(const U& other) const {
+        return pow_impl<U>(other);
+    }
+
+    template <typename U>
+    auto pow_impl(const U& other) const {
+        xt::xarray<float_> result = xt::pow(xt::cast<float_>(_array), other);
+        return ndarray<float_>(result);
+    }
+
+    template <typename U>
+    ndarray operator& (const ndarray<U>& other) const {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = xt::cast<result_type>(_array) & xt::cast<result_type>(other.get_array());
+        return ndarray(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    ndarray operator& (const U& other) const {
+        xt::xarray<T> result = _array & static_cast<T>(other);
+        return ndarray(result);
+    }
+
+    template <typename U>
+    ndarray operator| (const ndarray<U>& other) const {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = xt::cast<result_type>(_array) | xt::cast<result_type>(other.get_array());
+        return ndarray(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    ndarray operator| (const U& other) const {
+        xt::xarray<T> result = _array | static_cast<T>(other);
+        return ndarray(result);
+    }
+
+    template <typename U>
+    ndarray operator^ (const ndarray<U>& other) const {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = xt::cast<result_type>(_array) ^ xt::cast<result_type>(other.get_array());
+        return ndarray(result);
+    }
+
+    template <typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+    ndarray operator^ (const U& other) const {
+        xt::xarray<T> result = _array ^ static_cast<T>(other);
+        return ndarray(result);
+    }
+
+    ndarray operator~() const { return ndarray(~(_array)); }
+
+    ndarray operator!() const { return ndarray(!(_array)); }
+
+    T operator() (int index) const { return _array(index); }
+
+    ndarray operator[] (int index) const { return ndarray(xt::view(_array, index, xt::all())); }
+
+    ndarray operator[] (const std::vector<int>& indices) const { return ndarray(xt::view(_array, xt::keep(indices))); }
+
+    ndarray operator[] (const std::tuple<int, int, int>& slice) const {
+        return ndarray(xt::view(_array, xt::range(std::get<0>(slice), std::get<1>(slice), std::get<2>(slice))));
+    }
+
+    template <typename... Args>
+    T operator() (Args... args) const {
+        return _array(args...);
+    }
+
+    void set_item(int index, const ndarray<T>& value) { xt::view(_array, index, xt::all()) = value.get_array(); }
+
+    void set_item(int i1, int i2, const ndarray<T>& value) { xt::view(_array, i1, i2, xt::all()) = value.get_array(); }
+
+    void set_item(int i1, int i2, int i3, const ndarray<T>& value) { xt::view(_array, i1, i2, i3, xt::all()) = value.get_array(); }
+
+    void set_item(int i1, int i2, int i3, int i4, const ndarray<T>& value) { xt::view(_array, i1, i2, i3, i4, xt::all()) = value.get_array(); }
+
+    void set_item(int i1, int i2, int i3, int i4, int i5, const ndarray<T>& value) { xt::view(_array, i1, i2, i3, i4, i5, xt::all()) = value.get_array(); }
+
+    void set_item(const std::vector<int>& indices, const ndarray<T>& value) {
+        xt::view(_array, xt::keep(indices)) = value.get_array();
+    }
+
+    void set_item(const std::tuple<int, int, int>& slice, const ndarray<T>& value) {
+        xt::view(_array, xt::range(std::get<0>(slice), std::get<1>(slice), std::get<2>(slice))) = value.get_array();
+    }
+
+    void set_item(int i1, int i2, T value) { xt::view(_array, i1, i2) = value; }
+
+    void set_item(int i1, int i2, int i3, T value) { xt::view(_array, i1, i2, i3) = value; }
+
+    void set_item(int i1, int i2, int i3, int i4, T value) { xt::view(_array, i1, i2, i3, i4) = value; }
+
+    void set_item(int i1, int i2, int i3, int i4, int i5, T value) { xt::view(_array, i1, i2, i3, i4, i5) = value; }
+
+    // Boolean Functions
+    bool all() const { return xt::all(_array); }
+
+    bool any() const { return xt::any(_array); }
+
+    // Aggregate Functions
+    T sum() const { return (xt::sum(_array))[0]; }
+
+    ndarray<T> sum(int axis) const {
+        xt::xarray<T> result = xt::sum(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> sum(const _ShapeLike& axis) const {
+        xt::xarray<T> result = xt::sum(_array, axis);
+        return ndarray<T>(result);
+    }
+
+    T prod() const { return (xt::prod(_array))[0]; }
+
+    ndarray<T> prod(int axis) const {
+        xt::xarray<T> result = xt::prod(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> prod(const _ShapeLike& axes) const {
+        xt::xarray<T> result = xt::prod(_array, axes);
+        return ndarray<T>(result);
+    }
+
+    T min() const { return (xt::amin(_array))[0]; }
+
+    ndarray<T> min(int axis) const {
+        xt::xarray<T> result = xt::amin(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> min(const _ShapeLike& axes) const {
+        xt::xarray<T> result = xt::amin(_array, axes);
+        return ndarray<T>(result);
+    }
+
+    T max() const { return (xt::amax(_array))[0]; }
+
+    ndarray<T> max(int axis) const {
+        xt::xarray<T> result = xt::amax(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> max(const _ShapeLike& axes) const {
+        xt::xarray<T> result = xt::amax(_array, axes);
+        return ndarray<T>(result);
+    }
+
+    pkpy::float64 mean() const { return (xt::mean(_array))[0]; }
+
+    ndarray<pkpy::float64> mean(int axis) const {
+        return ndarray<pkpy::float64>(xt::mean(_array, {axis}));
+    }
+
+    ndarray<pkpy::float64> mean(const _ShapeLike& axes) const {
+        return ndarray<pkpy::float64>(xt::mean(_array, axes));
+    }
+
+    pkpy::float64 std() const { return (xt::stddev(_array))[0]; }
+
+    ndarray<pkpy::float64> std(int axis) const {
+        return ndarray<pkpy::float64>(xt::stddev(_array, {axis}));
+    }
+
+    ndarray<pkpy::float64> std(const _ShapeLike& axes) const {
+        return ndarray<pkpy::float64>(xt::stddev(_array, axes));
+    }
+
+    pkpy::float64 var() const { return (xt::variance(_array))[0]; }
+
+    ndarray<pkpy::float64> var(int axis) const {
+        return ndarray<pkpy::float64>(xt::variance(_array, {axis}));
+    }
+
+    ndarray<pkpy::float64> var(const _ShapeLike& axes) const {
+        return ndarray<pkpy::float64>(xt::variance(_array, axes));
+    }
+
+    // Searching and Sorting Functions
+    pkpy::int64 argmin() const { return (xt::argmin(_array))[0]; }
+
+    ndarray<T> argmin(int axis) const {
+        xt::xarray<T> result = xt::argmin(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    pkpy::int64 argmax() const { return (xt::argmax(_array))[0]; }
+
+    ndarray<T> argmax(int axis) const {
+        xt::xarray<T> result = xt::argmax(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> argsort() const { return ndarray<T>(xt::argsort(_array)); }
+
+    ndarray<T> argsort(int axis) const {
+        xt::xarray<T> result = xt::argsort(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> sort() const { return ndarray<T>(xt::sort(_array)); }
+
+    ndarray<T> sort(int axis) const {
+        xt::xarray<T> result = xt::sort(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    // Shape Manipulation Functions
+    ndarray<T> reshape(const _ShapeLike& shape) const {
+        xt::xarray<T> dummy = _array;
+        dummy.reshape(shape);
+        return ndarray<T>(dummy);
+    }
+
+    // Does not preserve elements if expected size is not equal to the current size.
+    // https://github.com/xtensor-stack/xtensor/issues/1445
+    ndarray<T> resize(const _ShapeLike& shape) const {
+        xt::xarray<T> dummy = _array;
+        dummy.resize(shape);
+        return ndarray<T>(dummy);
+    }
+
+    ndarray<T> squeeze() const { return ndarray<T>(xt::squeeze(_array)); }
+
+    ndarray<T> squeeze(int axis) const {
+        xt::xarray<T> result = xt::squeeze(_array, {axis});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> transpose() const { return ndarray<T>(xt::transpose(_array)); }
+
+    ndarray<T> transpose(const _ShapeLike& permutation) const { return ndarray<T>(xt::transpose(_array, permutation)); }
+
+    template <typename... Args>
+    ndarray<T> transpose(Args... args) const {
+        xt::xarray<T> result = xt::transpose(_array, {args...});
+        return ndarray<T>(result);
+    }
+
+    ndarray<T> repeat(int repeats, int axis) const { return ndarray<T>(xt::repeat(_array, repeats, axis)); }
+
+    ndarray<T> repeat(const std::vector<size_t>& repeats, int axis) const {
+        return ndarray<T>(xt::repeat(_array, repeats, axis));
+    }
+
+    ndarray<T> flatten() const { return ndarray<T>(xt::flatten(_array)); }
+
+    // Miscellaneous Functions
+    ndarray<T> round() const { return ndarray<T>(xt::round(_array)); }
+
+    template <typename U>
+    ndarray<U> astype() const {
+        xt::xarray<U> result = xt::cast<U>(_array);
+        return ndarray<U>(result);
+    }
+
+    ndarray<T> copy() const {
+        ndarray<T> result = *this;
+        return result;
+    }
+
+    std::vector<T> to_list() const {
+        std::vector<T> vec;
+        for(auto &it : _array) {
+            vec.push_back(it);
+        }
+        return vec;
+    }
+
+private:
+    xt::xarray<T> _array;
+};
+
+class random {
+public:
+    random() {
+        auto seed = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+        xt::random::seed(static_cast<xt::random::seed_type>(seed));
+    }
+
+    template <typename T>
+    static T rand() {
+        random random_instance;
+        return (xt::random::rand<T>(std::vector{1}))[0];
+    }
+
+    template <typename T>
+    static ndarray<T> rand(const _ShapeLike& shape) {
+        random random_instance;
+        return ndarray<T>(xt::random::rand<T>(shape));
+    }
+
+    template <typename T>
+    static T randn() {
+        random random_instance;
+        return (xt::random::randn<T>(std::vector{1}))[0];
+    }
+
+    template <typename T>
+    static ndarray<T> randn(const _ShapeLike& shape) {
+        random random_instance;
+        return ndarray<T>(xt::random::randn<T>(shape));
+    }
+
+    template <typename T>
+    static int randint(T low, T high) {
+        random random_instance;
+        return (xt::random::randint<T>(std::vector{1}, low, high))[0];
+    }
+
+    template <typename T>
+    static ndarray<T> randint(T low, T high, const _ShapeLike& shape) {
+        random random_instance;
+        return ndarray<T>(xt::random::randint<T>(shape, low, high));
+    }
+
+    template <typename T>
+    static ndarray<T> uniform(T low, T high, const _ShapeLike& shape) {
+        random random_instance;
+        return ndarray<T>(xt::random::rand<T>(shape, low, high));
+    }
+};
+template<typename T, typename U>
+xt::xarray<std::common_type_t<T, U>> matrix_mul(const xt::xarray<T>& a, const xt::xarray<U>& b) {
+    using result_type = std::common_type_t<T, U>;
+    using Mat = xt::xarray<result_type>;
+    
+    bool first_is_1d = false;
+    bool second_is_1d = false;
+
+    xt::xarray<T> a_copy = a;
+    xt::xarray<U> b_copy = b;
+
+    if (a.dimension() == 1) {
+        first_is_1d = true;
+        a_copy = xt::reshape_view(a_copy, {1, 3});
+    }
+    if(b_copy.dimension() == 1) {
+        second_is_1d = true;
+        b_copy = xt::reshape_view(b_copy, {3, 1});
+    }
+    if (a_copy.dimension() == 2 && b_copy.dimension() == 2) {
+        int m = static_cast<int>(a_copy.shape()[0]);
+        int n = static_cast<int>(a_copy.shape()[1]);
+        int p = static_cast<int>(b_copy.shape()[1]);
+
+        Mat result = xt::zeros<result_type>({m, p});
+
+        for (int i = 0; i < m; i++) {
+            for (int j = 0; j < p; j++) {
+                for (int k = 0; k < n; k++) {
+                    result(i, j) = result(i, j) + a_copy(i, k) * b_copy(k, j);
+                }
+            }
+        }
+
+        if (first_is_1d) {
+            result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-2});
+        }
+        if (second_is_1d) {
+            result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-1});
+        }
+
+        return result;
+    }
+    else {
+        if (a_copy.dimension() == b_copy.dimension()) {
+            assert(a_copy.shape()[0] == b_copy.shape()[0]);
+            size_t layers = a_copy.shape()[0];
+            
+            Mat sub;
+            {
+                Mat a0 = xt::view(a_copy, 0);
+                Mat b0 = xt::view(b_copy, 0);
+                sub = matrix_mul(a0, b0);
+            }
+
+            auto out_shape = sub.shape();
+            out_shape.insert(out_shape.begin(), layers);
+            auto result = Mat::from_shape(out_shape);
+            xt::view(result, 0) = sub;
+
+            for (size_t i = 1; i < layers; i++) {
+                Mat ai = xt::view(a_copy, i);
+                Mat bi = xt::view(b_copy, i);
+                xt::view(result, i) = matrix_mul(ai, bi);
+            }
+
+            if (first_is_1d) {
+                result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-2});
+            }
+            if (second_is_1d) {
+                result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-1});
+            }
+
+            return result;
+        } else if (a_copy.dimension() > b_copy.dimension()) {
+            assert(a_copy.dimension() > b_copy.dimension());
+            size_t layers = a_copy.shape()[0];
+                
+            Mat sub;
+            {
+                Mat a0 = xt::view(a_copy, 0);
+                sub = matrix_mul(a0, b_copy);
+            }
+
+            auto out_shape = sub.shape();
+            out_shape.insert(out_shape.begin(), layers);
+            auto result = Mat::from_shape(out_shape);
+            xt::view(result, 0) = sub;
+
+            for (size_t i = 1; i < layers; i++) {
+                Mat ai = xt::view(a_copy, i);
+                xt::view(result, i) = matrix_mul(ai, b_copy);
+            }
+
+            if (first_is_1d) {
+                result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-2});
+            }
+            if (second_is_1d) {
+                result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-1});
+            }
+
+            return result;
+        } else {
+            assert(a_copy.dimension() < b_copy.dimension());
+            size_t layers = b_copy.shape()[0];
+                
+            Mat sub;
+            {
+                Mat b0 = xt::view(b_copy, 0);
+                sub = matrix_mul(a_copy, b0);
+            }
+
+            auto out_shape = sub.shape();
+            out_shape.insert(out_shape.begin(), layers);
+            auto result = Mat::from_shape(out_shape);
+            xt::view(result, 0) = sub;
+
+            for (size_t i = 1; i < layers; i++) {
+                Mat bi = xt::view(b_copy, i);
+                xt::view(result, i) = matrix_mul(a_copy, bi);
+            }
+
+            if (first_is_1d) {
+                result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-2});
+            }
+            if (second_is_1d) {
+                result = xt::squeeze(result, std::vector<std::size_t>{result.dimension()-1});
+            }
+
+            return result;
+        }
+    }
+}
+    
+template <typename T, typename U>
+ndarray<std::common_type_t<T, U>> matmul(const ndarray<T>& a, const ndarray<U>& b) {
+    return ndarray<std::common_type_t<T, U>>(matrix_mul(a.get_array(), b.get_array()));
+}
+
+template <typename T>
+ndarray<T> adapt(const std::vector<T>& init_list) {
+    return ndarray<T>(xt::adapt(init_list));
+}
+
+template <typename T>
+ndarray<T> adapt(const std::vector<std::vector<T>>& init_list) {
+    std::vector<T> flat_list;
+    for(auto row: init_list) {
+        for(auto elem: row) {
+            flat_list.push_back(elem);
+        }
+    }
+    std::vector<size_t> sh = {init_list.size(), init_list[0].size()};
+    return ndarray<T>(xt::adapt(flat_list, sh));
+}
+
+template <typename T>
+ndarray<T> adapt(const std::vector<std::vector<std::vector<T>>>& init_list) {
+    std::vector<T> flat_list;
+    for(auto row: init_list) {
+        for(auto elem: row) {
+            for(auto val: elem) {
+                flat_list.push_back(val);
+            }
+        }
+    }
+    std::vector<size_t> sh = {init_list.size(), init_list[0].size(), init_list[0][0].size()};
+    return ndarray<T>(xt::adapt(flat_list, sh));
+}
+
+template <typename T>
+ndarray<T> adapt(const std::vector<std::vector<std::vector<std::vector<T>>>>& init_list) {
+    std::vector<T> flat_list;
+    for(auto row: init_list) {
+        for(auto elem: row) {
+            for(auto val: elem) {
+                for(auto v: val) {
+                    flat_list.push_back(v);
+                }
+            }
+        }
+    }
+    std::vector<size_t> sh = {init_list.size(), init_list[0].size(), init_list[0][0].size(), init_list[0][0][0].size()};
+    return ndarray<T>(xt::adapt(flat_list, sh));
+}
+
+template <typename T>
+ndarray<T> adapt(const std::vector<std::vector<std::vector<std::vector<std::vector<T>>>>>& init_list) {
+    std::vector<T> flat_list;
+    for(auto row: init_list) {
+        for(auto elem: row) {
+            for(auto val: elem) {
+                for(auto v: val) {
+                    for(auto v1: v) {
+                        flat_list.push_back(v1);
+                    }
+                }
+            }
+        }
+    }
+    std::vector<size_t> sh = {init_list.size(),
+                              init_list[0].size(),
+                              init_list[0][0].size(),
+                              init_list[0][0][0].size(),
+                              init_list[0][0][0][0].size()};
+    return ndarray<T>(xt::adapt(flat_list, sh));
+}
+
+// Array Creation
+template <typename U, typename T>
+ndarray<U> array(const std::vector<T>& vec, const _ShapeLike& shape = {}) {
+    if(shape.empty()) {
+        return ndarray<U>(xt::cast<U>(xt::adapt(vec)));
+    } else {
+        return ndarray<U>(xt::cast<U>(xt::adapt(vec, shape)));
+    }
+}
+
+template <typename T>
+ndarray<T> zeros(const _ShapeLike& shape) {
+    return ndarray<T>(xt::zeros<T>(shape));
+}
+
+template <typename T>
+ndarray<T> ones(const _ShapeLike& shape) {
+    return ndarray<T>(xt::ones<T>(shape));
+}
+
+template <typename T>
+ndarray<T> full(const _ShapeLike& shape, const T& fill_value) {
+    xt::xarray<T> result = xt::ones<T>(shape);
+    for(auto it = result.begin(); it != result.end(); ++it) {
+        *it = fill_value;
+    }
+    return ndarray<T>(result);
+}
+
+template <typename T>
+ndarray<T> identity(int n) {
+    return ndarray<T>(xt::eye<T>(n));
+}
+
+template <typename T>
+ndarray<T> arange(const T& stop) {
+    return ndarray<T>(xt::arange<T>(stop));
+}
+
+template <typename T>
+ndarray<T> arange(const T& start, const T& stop) {
+    return ndarray<T>(xt::arange<T>(start, stop));
+}
+
+template <typename T>
+ndarray<T> arange(const T& start, const T& stop, const T& step) {
+    return ndarray<T>(xt::arange<T>(start, stop, step));
+}
+
+template <typename T>
+ndarray<T> linspace(const T& start, const T& stop, int num = 50, bool endpoint = true) {
+    return ndarray<T>(xt::linspace<T>(start, stop, num, endpoint));
+}
+
+// Trigonometry
+template <typename T>
+ndarray<float_> sin(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::sin(arr.get_array()));
+}
+
+ndarray<complex_> sin(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::sin(arr.get_array())); }
+
+ndarray<complex_> sin(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::sin(arr.get_array())); }
+
+template <typename T>
+ndarray<float_> cos(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::cos(arr.get_array()));
+}
+
+ndarray<complex_> cos(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::cos(arr.get_array())); }
+
+ndarray<complex_> cos(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::cos(arr.get_array())); }
+
+template <typename T>
+ndarray<float_> tan(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::tan(arr.get_array()));
+}
+
+ndarray<complex_> tan(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::tan(arr.get_array())); }
+
+ndarray<complex_> tan(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::tan(arr.get_array())); }
+
+template <typename T>
+ndarray<float_> arcsin(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::asin(arr.get_array()));
+}
+
+ndarray<complex_> arcsin(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::asin(arr.get_array())); }
+
+ndarray<complex_> arcsin(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::asin(arr.get_array())); }
+
+template <typename T>
+ndarray<float_> arccos(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::acos(arr.get_array()));
+}
+
+ndarray<complex_> arccos(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::acos(arr.get_array())); }
+
+ndarray<complex_> arccos(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::acos(arr.get_array())); }
+
+template <typename T>
+ndarray<float_> arctan(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::atan(arr.get_array()));
+}
+
+ndarray<complex_> arctan(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::atan(arr.get_array())); }
+
+ndarray<complex_> arctan(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::atan(arr.get_array())); }
+
+// Exponents and Logarithms
+template <typename T>
+ndarray<float_> exp(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::exp(arr.get_array()));
+}
+
+ndarray<complex_> exp(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::exp(arr.get_array())); }
+
+ndarray<complex_> exp(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::exp(arr.get_array())); }
+
+template <typename T>
+ndarray<float_> log(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::log(arr.get_array()));
+}
+
+ndarray<complex_> log(const ndarray<complex64>& arr) { return ndarray<complex_>(xt::log(arr.get_array())); }
+
+ndarray<complex_> log(const ndarray<complex128>& arr) { return ndarray<complex_>(xt::log(arr.get_array())); }
+
+template <typename T>
+ndarray<float_> log2(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::log2(arr.get_array()));
+}
+
+template <typename T>
+ndarray<float_> log10(const ndarray<T>& arr) {
+    return ndarray<float_>(xt::log10(arr.get_array()));
+}
+
+// Miscellanous
+template <typename T>
+ndarray<T> round(const ndarray<T>& arr) {
+    return ndarray<T>(xt::round(arr.get_array()));
+}
+
+template <typename T>
+ndarray<T> floor(const ndarray<T>& arr) {
+    return ndarray<T>(xt::floor(arr.get_array()));
+}
+
+template <typename T>
+ndarray<T> ceil(const ndarray<T>& arr) {
+    return ndarray<T>(xt::ceil(arr.get_array()));
+}
+
+template <typename T>
+auto abs(const ndarray<T>& arr) {
+    if constexpr(std::is_same_v<T, complex64> || std::is_same_v<T, complex128>) {
+        return ndarray<float_>(xt::abs(arr.get_array()));
+    } else {
+        return ndarray<T>(xt::abs(arr.get_array()));
+    }
+}
+
+// Xtensor only supports concatenation of initialized objects.
+// https://github.com/xtensor-stack/xtensor/issues/1450
+template <typename T, typename U>
+auto concatenate(const ndarray<T>& arr1, const ndarray<U>& arr2, int axis = 0) {
+    using result_type = std::common_type_t<T, U>;
+    xt::xarray<result_type> xarr1 = xt::cast<result_type>(arr1.get_array());
+    xt::xarray<result_type> xarr2 = xt::cast<result_type>(arr2.get_array());
+    return ndarray<result_type>(xt::concatenate(xt::xtuple(xarr1, xarr2), axis));
+}
+
+// Constants
+constexpr float_ pi = xt::numeric_constants<double>::PI;
+constexpr double inf = std::numeric_limits<double>::infinity();
+
+// Testing Functions
+template <typename T, typename U>
+bool allclose(const ndarray<T>& arr1, const ndarray<U>& arr2, float_ rtol = 1e-5, float_ atol = 1e-8) {
+    return xt::allclose(arr1.get_array(), arr2.get_array(), rtol, atol);
+}
+
+// Reverse Dunder Methods
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto operator+ (const U& scalar, const ndarray<T>& array) {
+    xt::xarray<T> arr = array.get_array();
+    if constexpr(std::is_same_v<U, float_>) {
+        xt::xarray<float_> result = scalar + xt::cast<float_>(arr);
+        return ndarray<float_>(result);
+    } else {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = scalar + xt::cast<result_type>(arr);
+        return ndarray<result_type>(result);
+    }
+}
+
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto operator- (const U& scalar, const ndarray<T>& array) {
+    xt::xarray<T> arr = array.get_array();
+    if constexpr(std::is_same_v<U, float_>) {
+        xt::xarray<float_> result = scalar - xt::cast<float_>(arr);
+        return ndarray<float_>(result);
+    } else {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = scalar - xt::cast<result_type>(arr);
+        return ndarray<result_type>(result);
+    }
+}
+
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto operator* (const U& scalar, const ndarray<T>& array) {
+    xt::xarray<T> arr = array.get_array();
+    if constexpr(std::is_same_v<U, float_>) {
+        xt::xarray<float_> result = scalar * xt::cast<float_>(arr);
+        return ndarray<float_>(result);
+    } else {
+        using result_type = std::common_type_t<T, U>;
+        xt::xarray<result_type> result = scalar * xt::cast<result_type>(arr);
+        return ndarray<result_type>(result);
+    }
+}
+
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto operator/ (const U& scalar, const ndarray<T>& array) {
+    xt::xarray<T> arr = array.get_array();
+    xt::xarray<float_> result = static_cast<float_>(scalar) / xt::cast<float_>(arr);
+    return ndarray<float_>(result);
+}
+
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto pow(const U& scalar, const ndarray<T>& array) {
+    xt::xarray<T> arr = array.get_array();
+    xt::xarray<float_> result = xt::pow(scalar, xt::cast<float_>(arr));
+    return ndarray<float_>(result);
+}
+
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto operator& (const U& scalar, const ndarray<T>& array) {
+    return array & scalar;
+}
+
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto operator| (const U& scalar, const ndarray<T>& array) {
+    return array | scalar;
+}
+
+template <typename T, typename U, typename = std::enable_if_t<!is_ndarray_v<U>>>
+auto operator^ (const U& scalar, const ndarray<T>& array) {
+    return array ^ scalar;
+}
+
+template <typename T>
+std::ostream& operator<< (std::ostream& os, const ndarray<T>& arr) {
+    os << arr.get_array();
+    return os;
+}
+
+}  // namespace numpy
+}  // namespace pkpy

+ 347 - 0
3rd/numpy/include/xtensor/xaccessible.hpp

@@ -0,0 +1,347 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_ACCESSIBLE_HPP
+#define XTENSOR_ACCESSIBLE_HPP
+
+#include "xexception.hpp"
+#include "xstrides.hpp"
+#include "xtensor_forward.hpp"
+
+namespace xt
+{
+    /**
+     * @class xconst_accessible
+     * @brief Base class for implementation of common expression constant access methods.
+     *
+     * The xaccessible class implements constant access methods common to all expressions.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xconst_accessible
+     *           provides the interface.
+     */
+    template <class D>
+    class xconst_accessible
+    {
+    public:
+
+        using derived_type = D;
+        using inner_types = xcontainer_inner_types<D>;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using size_type = typename inner_types::size_type;
+
+        size_type size() const noexcept;
+        size_type dimension() const noexcept;
+        size_type shape(size_type index) const;
+
+        template <class... Args>
+        const_reference at(Args... args) const;
+
+        template <class S>
+        disable_integral_t<S, const_reference> operator[](const S& index) const;
+        template <class I>
+        const_reference operator[](std::initializer_list<I> index) const;
+        const_reference operator[](size_type i) const;
+
+        template <class... Args>
+        const_reference periodic(Args... args) const;
+
+        template <class... Args>
+        bool in_bounds(Args... args) const;
+
+        const_reference front() const;
+        const_reference back() const;
+
+    protected:
+
+        xconst_accessible() = default;
+        ~xconst_accessible() = default;
+
+        xconst_accessible(const xconst_accessible&) = default;
+        xconst_accessible& operator=(const xconst_accessible&) = default;
+
+        xconst_accessible(xconst_accessible&&) = default;
+        xconst_accessible& operator=(xconst_accessible&&) = default;
+
+    private:
+
+        const derived_type& derived_cast() const noexcept;
+    };
+
+    /**
+     * @class xaccessible
+     * @brief Base class for implementation of common expression access methods.
+     *
+     * The xaccessible class implements access methods common to all expressions.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xaccessible
+     *           provides the interface.
+     */
+    template <class D>
+    class xaccessible : public xconst_accessible<D>
+    {
+    public:
+
+        using base_type = xconst_accessible<D>;
+        using derived_type = typename base_type::derived_type;
+        using reference = typename base_type::reference;
+        using size_type = typename base_type::size_type;
+
+        template <class... Args>
+        reference at(Args... args);
+
+        template <class S>
+        disable_integral_t<S, reference> operator[](const S& index);
+        template <class I>
+        reference operator[](std::initializer_list<I> index);
+        reference operator[](size_type i);
+
+        template <class... Args>
+        reference periodic(Args... args);
+
+        reference front();
+        reference back();
+
+        using base_type::at;
+        using base_type::operator[];
+        using base_type::back;
+        using base_type::front;
+        using base_type::periodic;
+
+    protected:
+
+        xaccessible() = default;
+        ~xaccessible() = default;
+
+        xaccessible(const xaccessible&) = default;
+        xaccessible& operator=(const xaccessible&) = default;
+
+        xaccessible(xaccessible&&) = default;
+        xaccessible& operator=(xaccessible&&) = default;
+
+    private:
+
+        derived_type& derived_cast() noexcept;
+    };
+
+    /************************************
+     * xconst_accessible implementation *
+     ************************************/
+
+    /**
+     * Returns the size of the expression.
+     */
+    template <class D>
+    inline auto xconst_accessible<D>::size() const noexcept -> size_type
+    {
+        return compute_size(derived_cast().shape());
+    }
+
+    /**
+     * Returns the number of dimensions of the expression.
+     */
+    template <class D>
+    inline auto xconst_accessible<D>::dimension() const noexcept -> size_type
+    {
+        return derived_cast().shape().size();
+    }
+
+    /**
+     * Returns the i-th dimension of the expression.
+     */
+    template <class D>
+    inline auto xconst_accessible<D>::shape(size_type index) const -> size_type
+    {
+        return derived_cast().shape()[index];
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression,
+     * after dimension and bounds checking.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices should be equal to the number of dimensions
+     * of the expression.
+     * @exception std::out_of_range if the number of argument is greater than the number of dimensions
+     * or if indices are out of bounds.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xconst_accessible<D>::at(Args... args) const -> const_reference
+    {
+        check_access(derived_cast().shape(), args...);
+        return derived_cast().operator()(args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param index a sequence of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices in the list should be equal or greater
+     * than the number of dimensions of the expression.
+     */
+    template <class D>
+    template <class S>
+    inline auto xconst_accessible<D>::operator[](const S& index) const
+        -> disable_integral_t<S, const_reference>
+    {
+        return derived_cast().element(index.cbegin(), index.cend());
+    }
+
+    template <class D>
+    template <class I>
+    inline auto xconst_accessible<D>::operator[](std::initializer_list<I> index) const -> const_reference
+    {
+        return derived_cast().element(index.begin(), index.end());
+    }
+
+    template <class D>
+    inline auto xconst_accessible<D>::operator[](size_type i) const -> const_reference
+    {
+        return derived_cast().operator()(i);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression,
+     * after applying periodicity to the indices (negative and 'overflowing' indices are changed).
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be integers, the number of indices should be equal to the number of dimensions
+     * of the expression.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xconst_accessible<D>::periodic(Args... args) const -> const_reference
+    {
+        normalize_periodic(derived_cast().shape(), args...);
+        return derived_cast()(static_cast<size_type>(args)...);
+    }
+
+    /**
+     * Returns a constant reference to first the element of the expression
+     */
+    template <class D>
+    inline auto xconst_accessible<D>::front() const -> const_reference
+    {
+        return *derived_cast().begin();
+    }
+
+    /**
+     * Returns a constant reference to last the element of the expression
+     */
+    template <class D>
+    inline auto xconst_accessible<D>::back() const -> const_reference
+    {
+        return *std::prev(derived_cast().end());
+    }
+
+    /**
+     * Returns ``true`` only if the the specified position is a valid entry in the expression.
+     * @param args a list of indices specifying the position in the expression.
+     * @return bool
+     */
+    template <class D>
+    template <class... Args>
+    inline bool xconst_accessible<D>::in_bounds(Args... args) const
+    {
+        return check_in_bounds(derived_cast().shape(), args...);
+    }
+
+    template <class D>
+    inline auto xconst_accessible<D>::derived_cast() const noexcept -> const derived_type&
+    {
+        return *static_cast<const derived_type*>(this);
+    }
+
+    /******************************
+     * xaccessible implementation *
+     ******************************/
+
+    /**
+     * Returns a reference to the element at the specified position in the expression,
+     * after dimension and bounds checking.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices should be equal to the number of dimensions
+     * of the expression.
+     * @exception std::out_of_range if the number of argument is greater than the number of dimensions
+     * or if indices are out of bounds.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xaccessible<D>::at(Args... args) -> reference
+    {
+        check_access(derived_cast().shape(), args...);
+        return derived_cast().operator()(args...);
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the expression.
+     * @param index a sequence of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices in the list should be equal or greater
+     * than the number of dimensions of the expression.
+     */
+    template <class D>
+    template <class S>
+    inline auto xaccessible<D>::operator[](const S& index) -> disable_integral_t<S, reference>
+    {
+        return derived_cast().element(index.cbegin(), index.cend());
+    }
+
+    template <class D>
+    template <class I>
+    inline auto xaccessible<D>::operator[](std::initializer_list<I> index) -> reference
+    {
+        return derived_cast().element(index.begin(), index.end());
+    }
+
+    template <class D>
+    inline auto xaccessible<D>::operator[](size_type i) -> reference
+    {
+        return derived_cast().operator()(i);
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the expression,
+     * after applying periodicity to the indices (negative and 'overflowing' indices are changed).
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be integers, the number of indices should be equal to the number of dimensions
+     * of the expression.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xaccessible<D>::periodic(Args... args) -> reference
+    {
+        normalize_periodic(derived_cast().shape(), args...);
+        return derived_cast()(args...);
+    }
+
+    /**
+     * Returns a reference to the first element of the expression.
+     */
+    template <class D>
+    inline auto xaccessible<D>::front() -> reference
+    {
+        return *derived_cast().begin();
+    }
+
+    /**
+     * Returns a reference to the last element of the expression.
+     */
+    template <class D>
+    inline auto xaccessible<D>::back() -> reference
+    {
+        return *std::prev(derived_cast().end());
+    }
+
+    template <class D>
+    inline auto xaccessible<D>::derived_cast() noexcept -> derived_type&
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+}
+
+#endif

+ 370 - 0
3rd/numpy/include/xtensor/xaccumulator.hpp

@@ -0,0 +1,370 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_ACCUMULATOR_HPP
+#define XTENSOR_ACCUMULATOR_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <numeric>
+#include <type_traits>
+
+#include "xexpression.hpp"
+#include "xstrides.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_forward.hpp"
+
+namespace xt
+{
+
+#define DEFAULT_STRATEGY_ACCUMULATORS evaluation_strategy::immediate_type
+
+    namespace detail
+    {
+        template <class V = void>
+        struct accumulator_identity : xtl::identity
+        {
+            using value_type = V;
+        };
+    }
+
+    /**************
+     * accumulate *
+     **************/
+
+    template <class ACCUMULATE_FUNC, class INIT_FUNC = detail::accumulator_identity<void>>
+    struct xaccumulator_functor : public std::tuple<ACCUMULATE_FUNC, INIT_FUNC>
+    {
+        using self_type = xaccumulator_functor<ACCUMULATE_FUNC, INIT_FUNC>;
+        using base_type = std::tuple<ACCUMULATE_FUNC, INIT_FUNC>;
+        using accumulate_functor_type = ACCUMULATE_FUNC;
+        using init_functor_type = INIT_FUNC;
+        using init_value_type = typename init_functor_type::value_type;
+
+        xaccumulator_functor()
+            : base_type()
+        {
+        }
+
+        template <class RF>
+        xaccumulator_functor(RF&& accumulate_func)
+            : base_type(std::forward<RF>(accumulate_func), INIT_FUNC())
+        {
+        }
+
+        template <class RF, class IF>
+        xaccumulator_functor(RF&& accumulate_func, IF&& init_func)
+            : base_type(std::forward<RF>(accumulate_func), std::forward<IF>(init_func))
+        {
+        }
+    };
+
+    template <class RF>
+    auto make_xaccumulator_functor(RF&& accumulate_func)
+    {
+        using accumulator_type = xaccumulator_functor<std::remove_reference_t<RF>>;
+        return accumulator_type(std::forward<RF>(accumulate_func));
+    }
+
+    template <class RF, class IF>
+    auto make_xaccumulator_functor(RF&& accumulate_func, IF&& init_func)
+    {
+        using accumulator_type = xaccumulator_functor<std::remove_reference_t<RF>, std::remove_reference_t<IF>>;
+        return accumulator_type(std::forward<RF>(accumulate_func), std::forward<IF>(init_func));
+    }
+
+    namespace detail
+    {
+        template <class F, class E, class EVS>
+        xarray<typename std::decay_t<E>::value_type> accumulator_impl(F&&, E&&, std::size_t, EVS)
+        {
+            static_assert(
+                !std::is_same<evaluation_strategy::lazy_type, EVS>::value,
+                "Lazy accumulators not yet implemented."
+            );
+        }
+
+        template <class F, class E, class EVS>
+        xarray<typename std::decay_t<E>::value_type> accumulator_impl(F&&, E&&, EVS)
+        {
+            static_assert(
+                !std::is_same<evaluation_strategy::lazy_type, EVS>::value,
+                "Lazy accumulators not yet implemented."
+            );
+        }
+
+        template <class T, class R>
+        struct xaccumulator_return_type
+        {
+            using type = xarray<R>;
+        };
+
+        template <class T, layout_type L, class R>
+        struct xaccumulator_return_type<xarray<T, L>, R>
+        {
+            using type = xarray<R, L>;
+        };
+
+        template <class T, std::size_t N, layout_type L, class R>
+        struct xaccumulator_return_type<xtensor<T, N, L>, R>
+        {
+            using type = xtensor<R, N, L>;
+        };
+
+        template <class T, std::size_t... I, layout_type L, class R>
+        struct xaccumulator_return_type<xtensor_fixed<T, xshape<I...>, L>, R>
+        {
+            using type = xtensor_fixed<R, xshape<I...>, L>;
+        };
+
+        template <class T, class R>
+        using xaccumulator_return_type_t = typename xaccumulator_return_type<T, R>::type;
+
+        template <class T>
+        struct fixed_compute_size;
+
+        template <class T, class R>
+        struct xaccumulator_linear_return_type
+        {
+            using type = xtensor<R, 1>;
+        };
+
+        template <class T, layout_type L, class R>
+        struct xaccumulator_linear_return_type<xarray<T, L>, R>
+        {
+            using type = xtensor<R, 1, L>;
+        };
+
+        template <class T, std::size_t N, layout_type L, class R>
+        struct xaccumulator_linear_return_type<xtensor<T, N, L>, R>
+        {
+            using type = xtensor<R, 1, L>;
+        };
+
+        template <class T, std::size_t... I, layout_type L, class R>
+        struct xaccumulator_linear_return_type<xtensor_fixed<T, xshape<I...>, L>, R>
+        {
+            using type = xtensor_fixed<R, xshape<fixed_compute_size<xshape<I...>>::value>, L>;
+        };
+
+        template <class T, class R>
+        using xaccumulator_linear_return_type_t = typename xaccumulator_linear_return_type<T, R>::type;
+
+        template <class F, class E>
+        inline auto accumulator_init_with_f(F&& f, E& e, std::size_t axis)
+        {
+            // this function is the equivalent (but hopefully faster) to (if axis == 1)
+            // e[:, 0, :, :, ...] = f(e[:, 0, :, :, ...])
+            // so that all "first" values are initialized in a first pass
+
+            std::size_t outer_loop_size, inner_loop_size, pos = 0;
+            std::size_t outer_stride, inner_stride;
+
+            auto set_loop_sizes = [&outer_loop_size, &inner_loop_size](auto first, auto last, std::ptrdiff_t ax)
+            {
+                outer_loop_size = std::accumulate(
+                    first,
+                    first + ax,
+                    std::size_t(1),
+                    std::multiplies<std::size_t>()
+                );
+                inner_loop_size = std::accumulate(
+                    first + ax + 1,
+                    last,
+                    std::size_t(1),
+                    std::multiplies<std::size_t>()
+                );
+            };
+
+            // Note: add check that strides > 0
+            auto set_loop_strides = [&outer_stride, &inner_stride](auto first, auto last, std::ptrdiff_t ax)
+            {
+                outer_stride = static_cast<std::size_t>(ax == 0 ? 1 : *std::min_element(first, first + ax));
+                inner_stride = static_cast<std::size_t>(
+                    (ax == std::distance(first, last) - 1) ? 1 : *std::min_element(first + ax + 1, last)
+                );
+            };
+
+            set_loop_sizes(e.shape().begin(), e.shape().end(), static_cast<std::ptrdiff_t>(axis));
+            set_loop_strides(e.strides().begin(), e.strides().end(), static_cast<std::ptrdiff_t>(axis));
+
+            if (e.layout() == layout_type::column_major)
+            {
+                // swap for better memory locality (smaller stride in the inner loop)
+                std::swap(outer_loop_size, inner_loop_size);
+                std::swap(outer_stride, inner_stride);
+            }
+
+            for (std::size_t i = 0; i < outer_loop_size; ++i)
+            {
+                pos = i * outer_stride;
+                for (std::size_t j = 0; j < inner_loop_size; ++j)
+                {
+                    e.storage()[pos] = f(e.storage()[pos]);
+                    pos += inner_stride;
+                }
+            }
+        }
+
+        template <class F, class E>
+        inline auto accumulator_impl(F&& f, E&& e, std::size_t axis, evaluation_strategy::immediate_type)
+        {
+            using init_type = typename F::init_value_type;
+            using accumulate_functor_type = typename F::accumulate_functor_type;
+            using expr_value_type = typename std::decay_t<E>::value_type;
+            // using return_type = std::conditional_t<std::is_same<init_type, void>::value, typename
+            // std::decay_t<E>::value_type, init_type>;
+
+            using return_type = std::decay_t<decltype(std::declval<accumulate_functor_type>()(
+                std::declval<init_type>(),
+                std::declval<expr_value_type>()
+            ))>;
+
+            using result_type = xaccumulator_return_type_t<std::decay_t<E>, return_type>;
+
+            if (axis >= e.dimension())
+            {
+                XTENSOR_THROW(std::runtime_error, "Axis larger than expression dimension in accumulator.");
+            }
+
+            result_type res = e;  // assign + make a copy, we need it anyways
+
+            if (res.shape(axis) != std::size_t(0))
+            {
+                std::size_t inner_stride = static_cast<std::size_t>(res.strides()[axis]);
+                std::size_t outer_stride = 1;  // either row- or column-wise (strides.back / strides.front)
+                std::size_t outer_loop_size = 0;
+                std::size_t inner_loop_size = 0;
+                std::size_t init_size = e.shape()[axis] != std::size_t(1) ? std::size_t(1) : std::size_t(0);
+
+                auto set_loop_sizes =
+                    [&outer_loop_size, &inner_loop_size, init_size](auto first, auto last, std::ptrdiff_t ax)
+                {
+                    outer_loop_size = std::accumulate(first, first + ax, init_size, std::multiplies<std::size_t>());
+
+                    inner_loop_size = std::accumulate(
+                        first + ax,
+                        last,
+                        std::size_t(1),
+                        std::multiplies<std::size_t>()
+                    );
+                };
+
+                if (result_type::static_layout == layout_type::row_major)
+                {
+                    set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast<std::ptrdiff_t>(axis));
+                }
+                else
+                {
+                    set_loop_sizes(res.shape().cbegin(), res.shape().cend(), static_cast<std::ptrdiff_t>(axis + 1));
+                    std::swap(inner_loop_size, outer_loop_size);
+                }
+
+                std::size_t pos = 0;
+
+                inner_loop_size = inner_loop_size - inner_stride;
+
+                // activate the init loop if we have an init function other than identity
+                if (!std::is_same<
+                        std::decay_t<typename F::init_functor_type>,
+                        typename detail::accumulator_identity<init_type>>::value)
+                {
+                    accumulator_init_with_f(xt::get<1>(f), res, axis);
+                }
+
+                pos = 0;
+                for (std::size_t i = 0; i < outer_loop_size; ++i)
+                {
+                    for (std::size_t j = 0; j < inner_loop_size; ++j)
+                    {
+                        res.storage()[pos + inner_stride] = xt::get<0>(f)(
+                            res.storage()[pos],
+                            res.storage()[pos + inner_stride]
+                        );
+
+                        pos += outer_stride;
+                    }
+                    pos += inner_stride;
+                }
+            }
+            return res;
+        }
+
+        template <class F, class E>
+        inline auto accumulator_impl(F&& f, E&& e, evaluation_strategy::immediate_type)
+        {
+            using init_type = typename F::init_value_type;
+            using expr_value_type = typename std::decay_t<E>::value_type;
+            using accumulate_functor_type = typename F::accumulate_functor_type;
+            using return_type = std::decay_t<decltype(std::declval<accumulate_functor_type>()(
+                std::declval<init_type>(),
+                std::declval<expr_value_type>()
+            ))>;
+            // using return_type = std::conditional_t<std::is_same<init_type, void>::value, typename
+            // std::decay_t<E>::value_type, init_type>;
+
+            using result_type = xaccumulator_return_type_t<std::decay_t<E>, return_type>;
+
+            std::size_t sz = e.size();
+            auto result = result_type::from_shape({sz});
+
+            if (sz != std::size_t(0))
+            {
+                auto it = e.template begin<XTENSOR_DEFAULT_TRAVERSAL>();
+                result.storage()[0] = xt::get<1>(f)(*it);
+                ++it;
+
+                for (std::size_t idx = 0; it != e.template end<XTENSOR_DEFAULT_TRAVERSAL>(); ++it)
+                {
+                    result.storage()[idx + 1] = xt::get<0>(f)(result.storage()[idx], *it);
+                    ++idx;
+                }
+            }
+            return result;
+        }
+    }
+
+    /**
+     * Accumulate and flatten array
+     * **NOTE** This function is not lazy!
+     *
+     * @param f functor to use for accumulation
+     * @param e xexpression to be accumulated
+     * @param evaluation_strategy evaluation strategy of the accumulation
+     *
+     * @return returns xarray<T> filled with accumulated values
+     */
+    template <class F, class E, class EVS = DEFAULT_STRATEGY_ACCUMULATORS, XTL_REQUIRES(is_evaluation_strategy<EVS>)>
+    inline auto accumulate(F&& f, E&& e, EVS evaluation_strategy = EVS())
+    {
+        // Note we need to check is_integral above in order to prohibit EVS = int, and not taking the
+        // std::size_t overload below!
+        return detail::accumulator_impl(std::forward<F>(f), std::forward<E>(e), evaluation_strategy);
+    }
+
+    /**
+     * Accumulate over axis
+     * **NOTE** This function is not lazy!
+     *
+     * @param f Functor to use for accumulation
+     * @param e xexpression to accumulate
+     * @param axis Axis to perform accumulation over
+     * @param evaluation_strategy evaluation strategy of the accumulation
+     *
+     * @return returns xarray<T> filled with accumulated values
+     */
+    template <class F, class E, class EVS = DEFAULT_STRATEGY_ACCUMULATORS>
+    inline auto accumulate(F&& f, E&& e, std::ptrdiff_t axis, EVS evaluation_strategy = EVS())
+    {
+        std::size_t ax = normalize_axis(e.dimension(), axis);
+        return detail::accumulator_impl(std::forward<F>(f), std::forward<E>(e), ax, evaluation_strategy);
+    }
+}
+
+#endif

+ 921 - 0
3rd/numpy/include/xtensor/xadapt.hpp

@@ -0,0 +1,921 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_ADAPT_HPP
+#define XTENSOR_ADAPT_HPP
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+
+#include <xtl/xsequence.hpp>
+
+#include "xarray.hpp"
+#include "xbuffer_adaptor.hpp"
+#include "xfixed.hpp"
+#include "xtensor.hpp"
+
+namespace xt
+{
+    /**
+     * @defgroup xt_xadapt Adaptors of STL-like containers
+     */
+
+    namespace detail
+    {
+        template <class>
+        struct array_size_impl;
+
+        template <class T, std::size_t N>
+        struct array_size_impl<std::array<T, N>>
+        {
+            static constexpr std::size_t value = N;
+        };
+
+        template <class C>
+        using array_size = array_size_impl<std::decay_t<C>>;
+
+        template <class P>
+        struct default_allocator_for_ptr
+        {
+            using type = std::allocator<std::remove_const_t<std::remove_pointer_t<std::remove_reference_t<P>>>>;
+        };
+
+        template <class P>
+        using default_allocator_for_ptr_t = typename default_allocator_for_ptr<P>::type;
+
+        template <class T>
+        using not_an_array = xtl::negation<is_array<T>>;
+
+        template <class T>
+        using not_a_pointer = xtl::negation<std::is_pointer<T>>;
+
+        template <class T>
+        using not_a_layout = xtl::negation<std::is_same<layout_type, T>>;
+    }
+
+#ifndef IN_DOXYGEN
+
+    /**************************
+     * xarray_adaptor builder *
+     **************************/
+
+    /**
+     * Constructs an xarray_adaptor of the given stl-like container,
+     * with the specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container to adapt
+     * @param shape the shape of the xarray_adaptor
+     * @param l the layout_type of the xarray_adaptor
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class C,
+        class SC,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>, detail::not_a_pointer<C>)>
+    inline xarray_adaptor<xtl::closure_type_t<C>, L, std::decay_t<SC>>
+    adapt(C&& container, const SC& shape, layout_type l = L)
+    {
+        static_assert(!xtl::is_integral<SC>::value, "shape cannot be a integer");
+        using return_type = xarray_adaptor<xtl::closure_type_t<C>, L, std::decay_t<SC>>;
+        return return_type(std::forward<C>(container), shape, l);
+    }
+
+    /**
+     * Constructs an non-owning xarray_adaptor from a pointer with the specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the container to adapt
+     * @param shape the shape of the xarray_adaptor
+     * @param l the layout_type of the xarray_adaptor
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class C,
+        class SC,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>, std::is_pointer<std::remove_reference_t<C>>)>
+    inline auto adapt(C&& pointer, const SC& shape, layout_type l = L)
+    {
+        static_assert(!xtl::is_integral<SC>::value, "shape cannot be a integer");
+        using buffer_type = xbuffer_adaptor<C, xt::no_ownership, detail::default_allocator_for_ptr_t<C>>;
+        using return_type = xarray_adaptor<buffer_type, L, std::decay_t<SC>>;
+        std::size_t size = compute_size(shape);
+        return return_type(buffer_type(pointer, size), shape, l);
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given stl-like container,
+     * with the specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container to adapt
+     * @param shape the shape of the xarray_adaptor
+     * @param strides the strides of the xarray_adaptor
+     */
+    template <
+        class C,
+        class SC,
+        class SS,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>, detail::not_a_layout<std::decay_t<SS>>)>
+    inline xarray_adaptor<xtl::closure_type_t<C>, layout_type::dynamic, std::decay_t<SC>>
+    adapt(C&& container, SC&& shape, SS&& strides)
+    {
+        static_assert(!xtl::is_integral<std::decay_t<SC>>::value, "shape cannot be a integer");
+        using return_type = xarray_adaptor<xtl::closure_type_t<C>, layout_type::dynamic, std::decay_t<SC>>;
+        return return_type(
+            std::forward<C>(container),
+            xtl::forward_sequence<typename return_type::inner_shape_type, SC>(shape),
+            xtl::forward_sequence<typename return_type::inner_strides_type, SS>(strides)
+        );
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given dynamically allocated C array,
+     * with the specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param shape the shape of the xarray_adaptor
+     * @param l the layout_type of the xarray_adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class P,
+        class O,
+        class SC,
+        class A = detail::default_allocator_for_ptr_t<P>,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>)>
+    inline xarray_adaptor<xbuffer_adaptor<xtl::closure_type_t<P>, O, A>, L, SC> adapt(
+        P&& pointer,
+        typename A::size_type size,
+        O ownership,
+        const SC& shape,
+        layout_type l = L,
+        const A& alloc = A()
+    )
+    {
+        static_assert(!xtl::is_integral<SC>::value, "shape cannot be a integer");
+        (void) ownership;
+        using buffer_type = xbuffer_adaptor<xtl::closure_type_t<P>, O, A>;
+        using return_type = xarray_adaptor<buffer_type, L, SC>;
+        buffer_type buf(std::forward<P>(pointer), size, alloc);
+        return return_type(std::move(buf), shape, l);
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given dynamically allocated C array,
+     * with the specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param shape the shape of the xarray_adaptor
+     * @param strides the strides of the xarray_adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <
+        class P,
+        class O,
+        class SC,
+        class SS,
+        class A = detail::default_allocator_for_ptr_t<P>,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>, detail::not_a_layout<std::decay_t<SS>>)>
+    inline xarray_adaptor<xbuffer_adaptor<xtl::closure_type_t<P>, O, A>, layout_type::dynamic, std::decay_t<SC>>
+    adapt(P&& pointer, typename A::size_type size, O ownership, SC&& shape, SS&& strides, const A& alloc = A())
+    {
+        static_assert(!xtl::is_integral<std::decay_t<SC>>::value, "shape cannot be a integer");
+        (void) ownership;
+        using buffer_type = xbuffer_adaptor<xtl::closure_type_t<P>, O, A>;
+        using return_type = xarray_adaptor<buffer_type, layout_type::dynamic, std::decay_t<SC>>;
+        buffer_type buf(std::forward<P>(pointer), size, alloc);
+        return return_type(
+            std::move(buf),
+            xtl::forward_sequence<typename return_type::inner_shape_type, SC>(shape),
+            xtl::forward_sequence<typename return_type::inner_strides_type, SS>(strides)
+        );
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given C array allocated on the stack, with the
+     * specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param c_array the C array allocated on the stack
+     * @param shape the shape of the xarray_adaptor
+     * @param l the layout_type of the xarray_adaptor
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class T,
+        std::size_t N,
+        class SC,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>)>
+    inline auto adapt(T (&c_array)[N], const SC& shape, layout_type l = L)
+    {
+        return adapt(&c_array[0], N, xt::no_ownership(), shape, l);
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given C array allocated on the stack, with the
+     * specified shape and stirdes.
+     *
+     * @ingroup xt_xadapt
+     * @param c_array the C array allocated on the stack
+     * @param shape the shape of the xarray_adaptor
+     * @param strides the strides of the xarray_adaptor
+     */
+    template <
+        class T,
+        std::size_t N,
+        class SC,
+        class SS,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>, detail::not_a_layout<std::decay_t<SS>>)>
+    inline auto adapt(T (&c_array)[N], SC&& shape, SS&& strides)
+    {
+        return adapt(&c_array[0], N, xt::no_ownership(), std::forward<SC>(shape), std::forward<SS>(strides));
+    }
+
+    /***************************
+     * xtensor_adaptor builder *
+     ***************************/
+
+    /**
+     * Constructs a 1-D xtensor_adaptor of the given stl-like container,
+     * with the specified layout_type.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container to adapt
+     * @param l the layout_type of the xtensor_adaptor
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class C>
+    inline xtensor_adaptor<C, 1, L> adapt(C&& container, layout_type l = L)
+    {
+        const std::array<typename std::decay_t<C>::size_type, 1> shape{container.size()};
+        using return_type = xtensor_adaptor<xtl::closure_type_t<C>, 1, L>;
+        return return_type(std::forward<C>(container), shape, l);
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given stl-like container,
+     * with the specified shape and layout_type.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container to adapt
+     * @param shape the shape of the xtensor_adaptor
+     * @param l the layout_type of the xtensor_adaptor
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class C,
+        class SC,
+        XTL_REQUIRES(detail::is_array<std::decay_t<SC>>, detail::not_a_pointer<C>)>
+    inline xtensor_adaptor<C, detail::array_size<SC>::value, L>
+    adapt(C&& container, const SC& shape, layout_type l = L)
+    {
+        static_assert(!xtl::is_integral<SC>::value, "shape cannot be a integer");
+        constexpr std::size_t N = detail::array_size<SC>::value;
+        using return_type = xtensor_adaptor<xtl::closure_type_t<C>, N, L>;
+        return return_type(std::forward<C>(container), shape, l);
+    }
+
+    /**
+     * Constructs an non-owning xtensor_adaptor from a pointer with the specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to adapt
+     * @param shape the shape of the xtensor_adaptor
+     * @param l the layout_type of the xtensor_adaptor
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class C,
+        class SC,
+        XTL_REQUIRES(detail::is_array<std::decay_t<SC>>, std::is_pointer<std::remove_reference_t<C>>)>
+    inline auto adapt(C&& pointer, const SC& shape, layout_type l = L)
+    {
+        static_assert(!xtl::is_integral<SC>::value, "shape cannot be a integer");
+        using buffer_type = xbuffer_adaptor<C, xt::no_ownership, detail::default_allocator_for_ptr_t<C>>;
+        constexpr std::size_t N = detail::array_size<SC>::value;
+        using return_type = xtensor_adaptor<buffer_type, N, L>;
+        return return_type(buffer_type(pointer, compute_size(shape)), shape, l);
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given stl-like container,
+     * with the specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container to adapt
+     * @param shape the shape of the xtensor_adaptor
+     * @param strides the strides of the xtensor_adaptor
+     */
+    template <
+        class C,
+        class SC,
+        class SS,
+        XTL_REQUIRES(detail::is_array<std::decay_t<SC>>, detail::not_a_layout<std::decay_t<SS>>)>
+    inline xtensor_adaptor<C, detail::array_size<SC>::value, layout_type::dynamic>
+    adapt(C&& container, SC&& shape, SS&& strides)
+    {
+        static_assert(!xtl::is_integral<std::decay_t<SC>>::value, "shape cannot be a integer");
+        constexpr std::size_t N = detail::array_size<SC>::value;
+        using return_type = xtensor_adaptor<xtl::closure_type_t<C>, N, layout_type::dynamic>;
+        return return_type(
+            std::forward<C>(container),
+            xtl::forward_sequence<typename return_type::inner_shape_type, SC>(shape),
+            xtl::forward_sequence<typename return_type::inner_strides_type, SS>(strides)
+        );
+    }
+
+    /**
+     * Constructs a 1-D xtensor_adaptor of the given dynamically allocated C array,
+     * with the specified layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param l the layout_type of the xtensor_adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class P, class O, class A = detail::default_allocator_for_ptr_t<P>>
+    inline xtensor_adaptor<xbuffer_adaptor<xtl::closure_type_t<P>, O, A>, 1, L>
+    adapt(P&& pointer, typename A::size_type size, O ownership, layout_type l = L, const A& alloc = A())
+    {
+        (void) ownership;
+        using buffer_type = xbuffer_adaptor<xtl::closure_type_t<P>, O, A>;
+        using return_type = xtensor_adaptor<buffer_type, 1, L>;
+        buffer_type buf(std::forward<P>(pointer), size, alloc);
+        const std::array<typename A::size_type, 1> shape{size};
+        return return_type(std::move(buf), shape, l);
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given dynamically allocated C array,
+     * with the specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param shape the shape of the xtensor_adaptor
+     * @param l the layout_type of the xtensor_adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class P,
+        class O,
+        class SC,
+        class A = detail::default_allocator_for_ptr_t<P>,
+        XTL_REQUIRES(detail::is_array<std::decay_t<SC>>)>
+    inline xtensor_adaptor<xbuffer_adaptor<xtl::closure_type_t<P>, O, A>, detail::array_size<SC>::value, L>
+    adapt(
+        P&& pointer,
+        typename A::size_type size,
+        O ownership,
+        const SC& shape,
+        layout_type l = L,
+        const A& alloc = A()
+    )
+    {
+        static_assert(!xtl::is_integral<SC>::value, "shape cannot be a integer");
+        (void) ownership;
+        using buffer_type = xbuffer_adaptor<xtl::closure_type_t<P>, O, A>;
+        constexpr std::size_t N = detail::array_size<SC>::value;
+        using return_type = xtensor_adaptor<buffer_type, N, L>;
+        buffer_type buf(std::forward<P>(pointer), size, alloc);
+        return return_type(std::move(buf), shape, l);
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given dynamically allocated C array,
+     * with the specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param shape the shape of the xtensor_adaptor
+     * @param strides the strides of the xtensor_adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <
+        class P,
+        class O,
+        class SC,
+        class SS,
+        class A = detail::default_allocator_for_ptr_t<P>,
+        XTL_REQUIRES(detail::is_array<std::decay_t<SC>>, detail::not_a_layout<std::decay_t<SS>>)>
+    inline xtensor_adaptor<xbuffer_adaptor<xtl::closure_type_t<P>, O, A>, detail::array_size<SC>::value, layout_type::dynamic>
+    adapt(P&& pointer, typename A::size_type size, O ownership, SC&& shape, SS&& strides, const A& alloc = A())
+    {
+        static_assert(!xtl::is_integral<std::decay_t<SC>>::value, "shape cannot be a integer");
+        (void) ownership;
+        using buffer_type = xbuffer_adaptor<xtl::closure_type_t<P>, O, A>;
+        constexpr std::size_t N = detail::array_size<SC>::value;
+        using return_type = xtensor_adaptor<buffer_type, N, layout_type::dynamic>;
+        buffer_type buf(std::forward<P>(pointer), size, alloc);
+        return return_type(
+            std::move(buf),
+            xtl::forward_sequence<typename return_type::inner_shape_type, SC>(shape),
+            xtl::forward_sequence<typename return_type::inner_strides_type, SS>(strides)
+        );
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given C array allocated on the stack, with the
+     * specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param c_array the C array allocated on the stack
+     * @param shape the shape of the xarray_adaptor
+     * @param l the layout_type of the xarray_adaptor
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class T,
+        std::size_t N,
+        class SC,
+        XTL_REQUIRES(detail::is_array<std::decay_t<SC>>)>
+    inline auto adapt(T (&c_array)[N], const SC& shape, layout_type l = L)
+    {
+        return adapt(&c_array[0], N, xt::no_ownership(), shape, l);
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given C array allocated on the stack, with the
+     * specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param c_array the C array allocated on the stack
+     * @param shape the shape of the xarray_adaptor
+     * @param strides the strides of the xarray_adaptor
+     */
+    template <
+        class T,
+        std::size_t N,
+        class SC,
+        class SS,
+        XTL_REQUIRES(detail::is_array<std::decay_t<SC>>, detail::not_a_layout<std::decay_t<SS>>)>
+    inline auto adapt(T (&c_array)[N], SC&& shape, SS&& strides)
+    {
+        return adapt(&c_array[0], N, xt::no_ownership(), std::forward<SC>(shape), std::forward<SS>(strides));
+    }
+
+    /**
+     * Constructs an non-owning xtensor_fixed_adaptor from a pointer with the
+     * specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to adapt
+     * @param shape the shape of the xtensor_fixed_adaptor
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class C,
+        std::size_t... X,
+        XTL_REQUIRES(std::is_pointer<std::remove_reference_t<C>>)>
+    inline auto adapt(C&& pointer, const fixed_shape<X...>& /*shape*/)
+    {
+        using buffer_type = xbuffer_adaptor<C, xt::no_ownership, detail::default_allocator_for_ptr_t<C>>;
+        using return_type = xfixed_adaptor<buffer_type, fixed_shape<X...>, L>;
+        return return_type(buffer_type(pointer, detail::fixed_compute_size<fixed_shape<X...>>::value));
+    }
+
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class C, class T, std::size_t N>
+    inline auto adapt(C&& ptr, const T (&shape)[N])
+    {
+        using shape_type = std::array<std::size_t, N>;
+        return adapt(std::forward<C>(ptr), xtl::forward_sequence<shape_type, decltype(shape)>(shape));
+    }
+
+#else  // IN_DOXYGEN
+
+    /**
+     * Constructs:
+     * - an xarray_adaptor if SC is not an array type
+     * - an xtensor_adaptor if SC is an array type
+     *
+     * from the given stl-like container or pointer, with the specified shape and layout.
+     * If the adaptor is built from a pointer, it does not take its ownership.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container or pointer to adapt
+     * @param shape the shape of the adaptor
+     * @param l the layout_type of the adaptor
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class C, class SC>
+    inline auto adapt(C&& container, const SC& shape, layout_type l = L);
+
+    /**
+     * Constructs:
+     * - an xarray_adaptor if SC is not an array type
+     * - an xtensor_adaptor if SC is an array type
+     *
+     * from the given stl-like container with the specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container to adapt
+     * @param shape the shape of the adaptor
+     * @param strides the strides of the adaptor
+     */
+    template <class C, class SC, class SS>
+    inline auto adapt(C&& container, SC&& shape, SS&& strides);
+
+    /**
+     * Constructs:
+     * - an xarray_adaptor if SC is not an array type
+     * - an xtensor_adaptor if SC is an array type
+     *
+     * of the given dynamically allocated C array, with the specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param shape the shape of the adaptor
+     * @param l the layout_type of the adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class P, class O, class SC, class A = detail::default_allocator_for_ptr_t<P>>
+    inline auto adapt(
+        P&& pointer,
+        typename A::size_type size,
+        O ownership,
+        const SC& shape,
+        layout_type l = L,
+        const A& alloc = A()
+    );
+
+    /**
+     * Constructs:
+     * - an xarray_adaptor if SC is not an array type
+     * - an xtensor_adaptor if SC is an array type
+     *
+     * of the given dynamically allocated C array, with the specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param shape the shape of the adaptor
+     * @param strides the strides of the adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <class P, class O, class SC, class SS, class A = detail::default_allocator_for_ptr_t<P>>
+    inline auto
+    adapt(P&& pointer, typename A::size_type size, O ownership, SC&& shape, SS&& strides, const A& alloc = A());
+
+    /**
+     * Constructs:
+     * - an xarray_adaptor if SC is not an array type
+     * - an xtensor_adaptor if SC is an array type
+     *
+     * of the given C array allocated on the stack, with the specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param c_array the C array allocated on the stack
+     * @param shape the shape of the adaptor
+     * @param l the layout_type of the adaptor
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class T, std::size_t N, class SC>
+    inline auto adapt(T (&c_array)[N], const SC& shape, layout_type l = L);
+
+    /**
+     * Constructs:
+     * - an xarray_adaptor if SC is not an array type
+     * - an xtensor_adaptor if SC is an array type
+     *
+     * of the given C array allocated on the stack, with the
+     * specified shape and strides.
+     *
+     * @ingroup xt_xadapt
+     * @param c_array the C array allocated on the stack
+     * @param shape the shape of the adaptor
+     * @param strides the strides of the adaptor
+     */
+    template <class T, std::size_t N, class SC, class SS>
+    inline auto adapt(T (&c_array)[N], SC&& shape, SS&& strides);
+
+    /**
+     * Constructs an non-owning xtensor_fixed_adaptor from a pointer with the
+     * specified shape and layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to adapt
+     * @param shape the shape of the xtensor_fixed_adaptor
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class C, std::size_t... X>
+    inline auto adapt(C&& pointer, const fixed_shape<X...>& /*shape*/);
+
+    /**
+     * Constructs a 1-D xtensor_adaptor of the given stl-like container,
+     * with the specified layout_type.
+     *
+     * @ingroup xt_xadapt
+     * @param container the container to adapt
+     * @param l the layout_type of the xtensor_adaptor
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class C>
+    inline xtensor_adaptor<C, 1, L> adapt(C&& container, layout_type l = L);
+
+    /**
+     * Constructs a 1-D xtensor_adaptor of the given dynamically allocated C array,
+     * with the specified layout.
+     *
+     * @ingroup xt_xadapt
+     * @param pointer the pointer to the beginning of the dynamic array
+     * @param size the size of the dynamic array
+     * @param ownership indicates whether the adaptor takes ownership of the array.
+     *        Possible values are ``no_ownership()`` or ``acquire_ownership()``
+     * @param l the layout_type of the xtensor_adaptor
+     * @param alloc the allocator used for allocating / deallocating the dynamic array
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class P, class O, class A = detail::default_allocator_for_ptr_t<P>>
+    inline xtensor_adaptor<xbuffer_adaptor<xtl::closure_type_t<P>, O, A>, 1, L>
+    adapt(P&& pointer, typename A::size_type size, O ownership, layout_type l = L, const A& alloc = A());
+
+#endif  // IN_DOXYGEN
+
+    /*****************************
+     * smart_ptr adapter builder *
+     *****************************/
+
+    /**
+     * Adapt a smart pointer to a typed memory block (unique_ptr or shared_ptr)
+     *
+     * @code{.cpp}
+     * #include <xtensor/xadapt.hpp>
+     * #include <xtensor/xio.hpp>
+     *
+     * std::shared_ptr<double> sptr(new double[8], std::default_delete<double[]>());
+     * sptr.get()[2] = 321.;
+     * std::vector<size_t> shape = {4, 2};
+     * auto xptr = adapt_smart_ptr(sptr, shape);
+     * xptr(1, 3) = 123.;
+     * std::cout << xptr;
+     * @endcode
+     *
+     * @ingroup xt_xadapt
+     * @param smart_ptr a smart pointer to a memory block of T[]
+     * @param shape The desired shape
+     * @param l The desired memory layout
+     *
+     * @return xarray_adaptor for memory
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class P, class SC, XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>)>
+    auto adapt_smart_ptr(P&& smart_ptr, const SC& shape, layout_type l = L)
+    {
+        using buffer_adaptor = xbuffer_adaptor<decltype(smart_ptr.get()), smart_ownership, std::decay_t<P>>;
+        return xarray_adaptor<buffer_adaptor, L, std::decay_t<SC>>(
+            buffer_adaptor(smart_ptr.get(), compute_size(shape), std::forward<P>(smart_ptr)),
+            shape,
+            l
+        );
+    }
+
+    /**
+     * Adapt a smart pointer (shared_ptr or unique_ptr)
+     *
+     * This function allows to automatically adapt a shared or unique pointer to
+     * a given shape and operate naturally on it. Memory will be automatically
+     * handled by the smart pointer implementation.
+     *
+     * @code{.cpp}
+     * #include <xtensor/xadapt.hpp>
+     * #include <xtensor/xio.hpp>
+     *
+     * struct Buffer {
+     *     Buffer(std::vector<double>& buf) : m_buf(buf) {}
+     *     ~Buffer() { std::cout << "deleted" << std::endl; }
+     *     std::vector<double> m_buf;
+     * };
+     *
+     * auto data = std::vector<double>{1,2,3,4,5,6,7,8};
+     * auto shared_buf = std::make_shared<Buffer>(data);
+     * auto unique_buf = std::make_unique<Buffer>(data);
+     *
+     * std::cout << shared_buf.use_count() << std::endl;
+     * {
+     *     std::vector<size_t> shape = {2, 4};
+     *     auto obj = adapt_smart_ptr(shared_buf.get()->m_buf.data(),
+     *                                shape, shared_buf);
+     *     // Use count increased to 2
+     *     std::cout << shared_buf.use_count() << std::endl;
+     *     std::cout << obj << std::endl;
+     * }
+     * // Use count reset to 1
+     * std::cout << shared_buf.use_count() << std::endl;
+     *
+     * {
+     *     std::vector<size_t> shape = {2, 4};
+     *     auto obj = adapt_smart_ptr(unique_buf.get()->m_buf.data(),
+     *                                shape, std::move(unique_buf));
+     *     std::cout << obj << std::endl;
+     * }
+     * @endcode
+     *
+     * @ingroup xt_xadapt
+     * @param data_ptr A pointer to a typed data block (e.g. double*)
+     * @param shape The desired shape
+     * @param smart_ptr A smart pointer to move or copy, in order to manage memory
+     * @param l The desired memory layout
+     *
+     * @return xarray_adaptor on the memory
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class P,
+        class SC,
+        class D,
+        XTL_REQUIRES(detail::not_an_array<std::decay_t<SC>>, detail::not_a_layout<std::decay_t<D>>)>
+    auto adapt_smart_ptr(P&& data_ptr, const SC& shape, D&& smart_ptr, layout_type l = L)
+    {
+        using buffer_adaptor = xbuffer_adaptor<P, smart_ownership, std::decay_t<D>>;
+
+        return xarray_adaptor<buffer_adaptor, L, std::decay_t<SC>>(
+            buffer_adaptor(data_ptr, compute_size(shape), std::forward<D>(smart_ptr)),
+            shape,
+            l
+        );
+    }
+
+    /**
+     * Adapt a smart pointer to a typed memory block (unique_ptr or shared_ptr)
+     *
+     * @code{.cpp}
+     * #include <xtensor/xadapt.hpp>
+     * #include <xtensor/xio.hpp>
+     *
+     * std::shared_ptr<double> sptr(new double[8], std::default_delete<double[]>());
+     * sptr.get()[2] = 321.;
+     * auto xptr = adapt_smart_ptr(sptr, {4, 2});
+     * xptr(1, 3) = 123.;
+     * std::cout << xptr;
+     * @endcode
+     *
+     * @ingroup xt_xadapt
+     * @param smart_ptr a smart pointer to a memory block of T[]
+     * @param shape The desired shape
+     * @param l The desired memory layout
+     *
+     * @return xtensor_adaptor for memory
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class P, class I, std::size_t N>
+    auto adapt_smart_ptr(P&& smart_ptr, const I (&shape)[N], layout_type l = L)
+    {
+        using buffer_adaptor = xbuffer_adaptor<decltype(smart_ptr.get()), smart_ownership, std::decay_t<P>>;
+        std::array<std::size_t, N> fshape = xtl::forward_sequence<std::array<std::size_t, N>, decltype(shape)>(
+            shape
+        );
+        return xtensor_adaptor<buffer_adaptor, N, L>(
+            buffer_adaptor(smart_ptr.get(), compute_size(fshape), std::forward<P>(smart_ptr)),
+            std::move(fshape),
+            l
+        );
+    }
+
+    /**
+     * Adapt a smart pointer (shared_ptr or unique_ptr)
+     *
+     * This function allows to automatically adapt a shared or unique pointer to
+     * a given shape and operate naturally on it. Memory will be automatically
+     * handled by the smart pointer implementation.
+     *
+     * @code{.cpp}
+     * #include <xtensor/xadapt.hpp>
+     * #include <xtensor/xio.hpp>
+     *
+     * struct Buffer {
+     *     Buffer(std::vector<double>& buf) : m_buf(buf) {}
+     *     ~Buffer() { std::cout << "deleted" << std::endl; }
+     *     std::vector<double> m_buf;
+     * };
+     *
+     * auto data = std::vector<double>{1,2,3,4,5,6,7,8};
+     * auto shared_buf = std::make_shared<Buffer>(data);
+     * auto unique_buf = std::make_unique<Buffer>(data);
+     *
+     * std::cout << shared_buf.use_count() << std::endl;
+     * {
+     *     auto obj = adapt_smart_ptr(shared_buf.get()->m_buf.data(),
+     *                                {2, 4}, shared_buf);
+     *     // Use count increased to 2
+     *     std::cout << shared_buf.use_count() << std::endl;
+     *     std::cout << obj << std::endl;
+     * }
+     * // Use count reset to 1
+     * std::cout << shared_buf.use_count() << std::endl;
+     *
+     * {
+     *     auto obj = adapt_smart_ptr(unique_buf.get()->m_buf.data(),
+     *                                {2, 4}, std::move(unique_buf));
+     *     std::cout << obj << std::endl;
+     * }
+     * @endcode
+     *
+     * @ingroup xt_xadapt
+     * @param data_ptr A pointer to a typed data block (e.g. double*)
+     * @param shape The desired shape
+     * @param smart_ptr A smart pointer to move or copy, in order to manage memory
+     * @param l The desired memory layout
+     *
+     * @return xtensor_adaptor on the memory
+     */
+    template <
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class P,
+        class I,
+        std::size_t N,
+        class D,
+        XTL_REQUIRES(detail::not_a_layout<std::decay_t<D>>)>
+    auto adapt_smart_ptr(P&& data_ptr, const I (&shape)[N], D&& smart_ptr, layout_type l = L)
+    {
+        using buffer_adaptor = xbuffer_adaptor<P, smart_ownership, std::decay_t<D>>;
+        std::array<std::size_t, N> fshape = xtl::forward_sequence<std::array<std::size_t, N>, decltype(shape)>(
+            shape
+        );
+
+        return xtensor_adaptor<buffer_adaptor, N, L>(
+            buffer_adaptor(data_ptr, compute_size(fshape), std::forward<D>(smart_ptr)),
+            std::move(fshape),
+            l
+        );
+    }
+
+    /**
+     * @brief xtensor adaptor for a pointer.
+     *
+     * Construct for example with:
+     *
+     * @code{.cpp}
+     * #include <xtensor/xadapt.hpp>
+     *
+     * std::array<size_t, 2> shape = {2, 2};
+     * std::vector<double> data = {1, 2, 3, 4};
+     *
+     * xt::xtensor_pointer<double, 2> a = xt::adapt(data.data(), 4, xt::no_ownership(), shape);
+     * @endcode
+     *
+     * @ingroup xt_xadapt
+     * @tparam T The data type (e.g. ``double``).
+     * @tparam N The number of dimensions.
+     * @tparam L The xt::layout_type() of the xtensor.
+     */
+    template <class T, std::size_t N, layout_type L = XTENSOR_DEFAULT_LAYOUT>
+    using xtensor_pointer = xtensor_adaptor<
+        xbuffer_adaptor<xtl::closure_type_t<T*>, xt::no_ownership, detail::default_allocator_for_ptr_t<T>>,
+        N,
+        L>;
+
+    /**
+     * @brief xarray adaptor for a pointer.
+     *
+     * Construct for example with:
+     *
+     * @code{.cpp}
+     * #include <xtensor/xadapt.hpp>
+     *
+     * std::vector<int> data(4, 0);
+     * xt::svector<size_t> shape({2, 2});
+     *
+     * xt::xarray_pointer<int> a = xt::adapt(data.data(), data.size(), xt::no_ownership(), shape);
+     * @endcode
+     *
+     * @ingroup xt_xadapt
+     * @tparam T The data type (e.g. ``double``).
+     * @tparam L The xt::layout_type() of the xarray.
+     * @tparam SC The shape container type (e.g. ``xt::svector<size_t>``). Default matches
+     *      xt::adapt(P&&, typename A::size_type, O, const SC&, layout_type, const A& alloc)
+     */
+    template <
+        class T,
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class SC = XTENSOR_DEFAULT_SHAPE_CONTAINER(T, std::allocator<std::size_t>, std::allocator<std::size_t>)>
+    using xarray_pointer = xarray_adaptor<
+        xbuffer_adaptor<xtl::closure_type_t<T*>, xt::no_ownership, detail::default_allocator_for_ptr_t<T>>,
+        L,
+        SC>;
+}
+
+#endif

+ 667 - 0
3rd/numpy/include/xtensor/xarray.hpp

@@ -0,0 +1,667 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_ARRAY_HPP
+#define XTENSOR_ARRAY_HPP
+
+#include <algorithm>
+#include <initializer_list>
+#include <utility>
+
+#include <xtl/xsequence.hpp>
+
+#include "xbuffer_adaptor.hpp"
+#include "xcontainer.hpp"
+#include "xsemantic.hpp"
+
+namespace xt
+{
+
+    /********************************
+     * xarray_container declaration *
+     ********************************/
+
+    namespace extension
+    {
+        template <class EC, layout_type L, class SC, class Tag>
+        struct xarray_container_base;
+
+        template <class EC, layout_type L, class SC>
+        struct xarray_container_base<EC, L, SC, xtensor_expression_tag>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class EC, layout_type L, class SC, class Tag>
+        using xarray_container_base_t = typename xarray_container_base<EC, L, SC, Tag>::type;
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    struct xcontainer_inner_types<xarray_container<EC, L, SC, Tag>>
+    {
+        using storage_type = EC;
+        using reference = inner_reference_t<storage_type>;
+        using const_reference = typename storage_type::const_reference;
+        using size_type = typename storage_type::size_type;
+        using shape_type = SC;
+        using strides_type = get_strides_t<shape_type>;
+        using backstrides_type = get_strides_t<shape_type>;
+        using inner_shape_type = shape_type;
+        using inner_strides_type = strides_type;
+        using inner_backstrides_type = backstrides_type;
+        using temporary_type = xarray_container<EC, L, SC, Tag>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class EC, layout_type L, class SC, class Tag>
+    struct xiterable_inner_types<xarray_container<EC, L, SC, Tag>>
+        : xcontainer_iterable_types<xarray_container<EC, L, SC, Tag>>
+    {
+    };
+
+    /**
+     * @class xarray_container
+     * @brief Dense multidimensional container with tensor semantic.
+     *
+     * The xarray_container class implements a dense multidimensional container
+     * with tensor semantic.
+     *
+     * @tparam EC The type of the container holding the elements.
+     * @tparam L The layout_type of the container.
+     * @tparam SC The type of the containers holding the shape and the strides.
+     * @tparam Tag The expression tag.
+     * @sa xarray, xstrided_container, xcontainer
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    class xarray_container : public xstrided_container<xarray_container<EC, L, SC, Tag>>,
+                             public xcontainer_semantic<xarray_container<EC, L, SC, Tag>>,
+                             public extension::xarray_container_base_t<EC, L, SC, Tag>
+    {
+    public:
+
+        using self_type = xarray_container<EC, L, SC, Tag>;
+        using base_type = xstrided_container<self_type>;
+        using semantic_base = xcontainer_semantic<self_type>;
+        using extension_base = extension::xarray_container_base_t<EC, L, SC, Tag>;
+        using storage_type = typename base_type::storage_type;
+        using allocator_type = typename base_type::allocator_type;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+        using shape_type = typename base_type::shape_type;
+        using inner_shape_type = typename base_type::inner_shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+        using inner_strides_type = typename base_type::inner_strides_type;
+        using inner_backstrides_type = typename base_type::inner_backstrides_type;
+        using temporary_type = typename semantic_base::temporary_type;
+        using expression_tag = Tag;
+        static constexpr std::size_t rank = SIZE_MAX;
+
+        xarray_container();
+        explicit xarray_container(const shape_type& shape, layout_type l = L);
+        explicit xarray_container(const shape_type& shape, const_reference value, layout_type l = L);
+        explicit xarray_container(const shape_type& shape, const strides_type& strides);
+        explicit xarray_container(const shape_type& shape, const strides_type& strides, const_reference value);
+        explicit xarray_container(storage_type&& storage, inner_shape_type&& shape, inner_strides_type&& strides);
+
+        xarray_container(const value_type& t);
+        xarray_container(nested_initializer_list_t<value_type, 1> t);
+        xarray_container(nested_initializer_list_t<value_type, 2> t);
+        xarray_container(nested_initializer_list_t<value_type, 3> t);
+        xarray_container(nested_initializer_list_t<value_type, 4> t);
+        xarray_container(nested_initializer_list_t<value_type, 5> t);
+
+        template <class S = shape_type>
+        static xarray_container from_shape(S&& s);
+
+        ~xarray_container() = default;
+
+        xarray_container(const xarray_container&) = default;
+        xarray_container& operator=(const xarray_container&) = default;
+
+        xarray_container(xarray_container&&) = default;
+        xarray_container& operator=(xarray_container&&) = default;
+
+        template <std::size_t N>
+        explicit xarray_container(xtensor_container<EC, N, L, Tag>&& rhs);
+        template <std::size_t N>
+        xarray_container& operator=(xtensor_container<EC, N, L, Tag>&& rhs);
+
+        template <class E>
+        xarray_container(const xexpression<E>& e);
+
+        template <class E>
+        xarray_container& operator=(const xexpression<E>& e);
+
+    private:
+
+        storage_type m_storage;
+
+        storage_type& storage_impl() noexcept;
+        const storage_type& storage_impl() const noexcept;
+
+        friend class xcontainer<xarray_container<EC, L, SC, Tag>>;
+    };
+
+    /******************************
+     * xarray_adaptor declaration *
+     ******************************/
+
+    namespace extension
+    {
+        template <class EC, layout_type L, class SC, class Tag>
+        struct xarray_adaptor_base;
+
+        template <class EC, layout_type L, class SC>
+        struct xarray_adaptor_base<EC, L, SC, xtensor_expression_tag>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class EC, layout_type L, class SC, class Tag>
+        using xarray_adaptor_base_t = typename xarray_adaptor_base<EC, L, SC, Tag>::type;
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    struct xcontainer_inner_types<xarray_adaptor<EC, L, SC, Tag>>
+    {
+        using storage_type = std::remove_reference_t<EC>;
+        using reference = inner_reference_t<storage_type>;
+        using const_reference = typename storage_type::const_reference;
+        using size_type = typename storage_type::size_type;
+        using shape_type = SC;
+        using strides_type = get_strides_t<shape_type>;
+        using backstrides_type = get_strides_t<shape_type>;
+        using inner_shape_type = shape_type;
+        using inner_strides_type = strides_type;
+        using inner_backstrides_type = backstrides_type;
+        using temporary_type = xarray_container<temporary_container_t<storage_type>, L, SC, Tag>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class EC, layout_type L, class SC, class Tag>
+    struct xiterable_inner_types<xarray_adaptor<EC, L, SC, Tag>>
+        : xcontainer_iterable_types<xarray_adaptor<EC, L, SC, Tag>>
+    {
+    };
+
+    /**
+     * @class xarray_adaptor
+     * @brief Dense multidimensional container adaptor with
+     * tensor semantic.
+     *
+     * The xarray_adaptor class implements a dense multidimensional
+     * container adaptor with tensor semantic. It is used to provide
+     * a multidimensional container semantic and a tensor semantic to
+     * stl-like containers.
+     *
+     * @tparam EC The closure for the container type to adapt.
+     * @tparam L The layout_type of the adaptor.
+     * @tparam SC The type of the containers holding the shape and the strides.
+     * @tparam Tag The expression tag.
+     * @sa xstrided_container, xcontainer
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    class xarray_adaptor : public xstrided_container<xarray_adaptor<EC, L, SC, Tag>>,
+                           public xcontainer_semantic<xarray_adaptor<EC, L, SC, Tag>>,
+                           public extension::xarray_adaptor_base_t<EC, L, SC, Tag>
+    {
+    public:
+
+        using container_closure_type = EC;
+
+        using self_type = xarray_adaptor<EC, L, SC, Tag>;
+        using base_type = xstrided_container<self_type>;
+        using semantic_base = xcontainer_semantic<self_type>;
+        using extension_base = extension::xarray_adaptor_base_t<EC, L, SC, Tag>;
+        using storage_type = typename base_type::storage_type;
+        using allocator_type = typename base_type::allocator_type;
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+        using temporary_type = typename semantic_base::temporary_type;
+        using expression_tag = Tag;
+        static constexpr std::size_t rank = SIZE_MAX;
+
+        xarray_adaptor(storage_type&& storage);
+        xarray_adaptor(const storage_type& storage);
+
+        template <class D>
+        xarray_adaptor(D&& storage, const shape_type& shape, layout_type l = L);
+
+        template <class D>
+        xarray_adaptor(D&& storage, const shape_type& shape, const strides_type& strides);
+
+        ~xarray_adaptor() = default;
+
+        xarray_adaptor(const xarray_adaptor&) = default;
+        xarray_adaptor& operator=(const xarray_adaptor&);
+
+        xarray_adaptor(xarray_adaptor&&) = default;
+        xarray_adaptor& operator=(xarray_adaptor&&);
+        xarray_adaptor& operator=(temporary_type&&);
+
+        template <class E>
+        xarray_adaptor& operator=(const xexpression<E>& e);
+
+        template <class P, class S>
+        void reset_buffer(P&& pointer, S&& size);
+
+    private:
+
+        container_closure_type m_storage;
+
+        storage_type& storage_impl() noexcept;
+        const storage_type& storage_impl() const noexcept;
+
+        friend class xcontainer<xarray_adaptor<EC, L, SC, Tag>>;
+    };
+
+    /***********************************
+     * xarray_container implementation *
+     ***********************************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Allocates an uninitialized xarray_container that holds 0 element.
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container()
+        : base_type()
+        , m_storage(1, value_type())
+    {
+    }
+
+    /**
+     * Allocates an uninitialized xarray_container with the specified shape and
+     * layout_type.
+     * @param shape the shape of the xarray_container
+     * @param l the layout_type of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(const shape_type& shape, layout_type l)
+        : base_type()
+    {
+        base_type::resize(shape, l);
+    }
+
+    /**
+     * Allocates an xarray_container with the specified shape and layout_type. Elements
+     * are initialized to the specified value.
+     * @param shape the shape of the xarray_container
+     * @param value the value of the elements
+     * @param l the layout_type of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(
+        const shape_type& shape,
+        const_reference value,
+        layout_type l
+    )
+        : base_type()
+    {
+        base_type::resize(shape, l);
+        std::fill(m_storage.begin(), m_storage.end(), value);
+    }
+
+    /**
+     * Allocates an uninitialized xarray_container with the specified shape and strides.
+     * @param shape the shape of the xarray_container
+     * @param strides the strides of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(const shape_type& shape, const strides_type& strides)
+        : base_type()
+    {
+        base_type::resize(shape, strides);
+    }
+
+    /**
+     * Allocates an uninitialized xarray_container with the specified shape and strides.
+     * Elements are initialized to the specified value.
+     * @param shape the shape of the xarray_container
+     * @param strides the strides of the xarray_container
+     * @param value the value of the elements
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(
+        const shape_type& shape,
+        const strides_type& strides,
+        const_reference value
+    )
+        : base_type()
+    {
+        base_type::resize(shape, strides);
+        std::fill(m_storage.begin(), m_storage.end(), value);
+    }
+
+    /**
+     * Allocates an xarray_container that holds a single element initialized to the
+     * specified value.
+     * @param t the value of the element
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(const value_type& t)
+        : base_type()
+    {
+        base_type::resize(xt::shape<shape_type>(t), true);
+        nested_copy(m_storage.begin(), t);
+    }
+
+    /**
+     * Allocates an xarray_container by moving specified data, shape and strides
+     *
+     * @param storage the data for the xarray_container
+     * @param shape the shape of the xarray_container
+     * @param strides the strides of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(
+        storage_type&& storage,
+        inner_shape_type&& shape,
+        inner_strides_type&& strides
+    )
+        : base_type(std::move(shape), std::move(strides))
+        , m_storage(std::move(storage))
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Constructors from initializer list
+     */
+    //@{
+    /**
+     * Allocates a one-dimensional xarray_container.
+     * @param t the elements of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(nested_initializer_list_t<value_type, 1> t)
+        : base_type()
+    {
+        base_type::resize(xt::shape<shape_type>(t));
+        constexpr auto tmp = layout_type::row_major;
+        L == tmp ? nested_copy(m_storage.begin(), t) : nested_copy(this->template begin<tmp>(), t);
+    }
+
+    /**
+     * Allocates a two-dimensional xarray_container.
+     * @param t the elements of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(nested_initializer_list_t<value_type, 2> t)
+        : base_type()
+    {
+        base_type::resize(xt::shape<shape_type>(t));
+        constexpr auto tmp = layout_type::row_major;
+        L == tmp ? nested_copy(m_storage.begin(), t) : nested_copy(this->template begin<tmp>(), t);
+    }
+
+    /**
+     * Allocates a three-dimensional xarray_container.
+     * @param t the elements of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(nested_initializer_list_t<value_type, 3> t)
+        : base_type()
+    {
+        base_type::resize(xt::shape<shape_type>(t));
+        constexpr auto tmp = layout_type::row_major;
+        L == tmp ? nested_copy(m_storage.begin(), t) : nested_copy(this->template begin<tmp>(), t);
+    }
+
+    /**
+     * Allocates a four-dimensional xarray_container.
+     * @param t the elements of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(nested_initializer_list_t<value_type, 4> t)
+        : base_type()
+    {
+        base_type::resize(xt::shape<shape_type>(t));
+        constexpr auto tmp = layout_type::row_major;
+        L == tmp ? nested_copy(m_storage.begin(), t) : nested_copy(this->template begin<tmp>(), t);
+    }
+
+    /**
+     * Allocates a five-dimensional xarray_container.
+     * @param t the elements of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(nested_initializer_list_t<value_type, 5> t)
+        : base_type()
+    {
+        base_type::resize(xt::shape<shape_type>(t));
+        constexpr auto tmp = layout_type::row_major;
+        L == tmp ? nested_copy(m_storage.begin(), t) : nested_copy(this->template begin<tmp>(), t);
+    }
+
+    //@}
+
+    /**
+     * Allocates and returns an xarray_container with the specified shape.
+     * @param s the shape of the xarray_container
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    template <class S>
+    inline xarray_container<EC, L, SC, Tag> xarray_container<EC, L, SC, Tag>::from_shape(S&& s)
+    {
+        shape_type shape = xtl::forward_sequence<shape_type, S>(s);
+        return self_type(shape);
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    template <std::size_t N>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(xtensor_container<EC, N, L, Tag>&& rhs)
+        : base_type(
+            inner_shape_type(rhs.shape().cbegin(), rhs.shape().cend()),
+            inner_strides_type(rhs.strides().cbegin(), rhs.strides().cend()),
+            inner_backstrides_type(rhs.backstrides().cbegin(), rhs.backstrides().cend()),
+            std::move(rhs.layout())
+        )
+        , m_storage(std::move(rhs.storage()))
+    {
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    template <std::size_t N>
+    inline xarray_container<EC, L, SC, Tag>&
+    xarray_container<EC, L, SC, Tag>::operator=(xtensor_container<EC, N, L, Tag>&& rhs)
+    {
+        this->shape_impl().assign(rhs.shape().cbegin(), rhs.shape().cend());
+        this->strides_impl().assign(rhs.strides().cbegin(), rhs.strides().cend());
+        this->backstrides_impl().assign(rhs.backstrides().cbegin(), rhs.backstrides().cend());
+        this->mutable_layout() = rhs.layout();
+        m_storage = std::move(rhs.storage());
+        return *this;
+    }
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended copy constructor.
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    template <class E>
+    inline xarray_container<EC, L, SC, Tag>::xarray_container(const xexpression<E>& e)
+        : base_type()
+    {
+        // Avoids unintialized data because of (m_shape == shape) condition
+        // in resize (called by assign), which is always true when dimension == 0.
+        if (e.derived_cast().dimension() == 0)
+        {
+            detail::resize_data_container(m_storage, std::size_t(1));
+        }
+        semantic_base::assign(e);
+    }
+
+    /**
+     * The extended assignment operator.
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    template <class E>
+    inline auto xarray_container<EC, L, SC, Tag>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class EC, layout_type L, class SC, class Tag>
+    inline auto xarray_container<EC, L, SC, Tag>::storage_impl() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    inline auto xarray_container<EC, L, SC, Tag>::storage_impl() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    /******************
+     * xarray_adaptor *
+     ******************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Constructs an xarray_adaptor of the given stl-like container.
+     * @param storage the container to adapt
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_adaptor<EC, L, SC, Tag>::xarray_adaptor(storage_type&& storage)
+        : base_type()
+        , m_storage(std::move(storage))
+    {
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given stl-like container.
+     * @param storage the container to adapt
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    inline xarray_adaptor<EC, L, SC, Tag>::xarray_adaptor(const storage_type& storage)
+        : base_type()
+        , m_storage(storage)
+    {
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given stl-like container,
+     * with the specified shape and layout_type.
+     * @param storage the container to adapt
+     * @param shape the shape of the xarray_adaptor
+     * @param l the layout_type of the xarray_adaptor
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    template <class D>
+    inline xarray_adaptor<EC, L, SC, Tag>::xarray_adaptor(D&& storage, const shape_type& shape, layout_type l)
+        : base_type()
+        , m_storage(std::forward<D>(storage))
+    {
+        base_type::resize(shape, l);
+    }
+
+    /**
+     * Constructs an xarray_adaptor of the given stl-like container,
+     * with the specified shape and strides.
+     * @param storage the container to adapt
+     * @param shape the shape of the xarray_adaptor
+     * @param strides the strides of the xarray_adaptor
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    template <class D>
+    inline xarray_adaptor<EC, L, SC, Tag>::xarray_adaptor(
+        D&& storage,
+        const shape_type& shape,
+        const strides_type& strides
+    )
+        : base_type()
+        , m_storage(std::forward<D>(storage))
+    {
+        base_type::resize(shape, strides);
+    }
+
+    //@}
+
+    template <class EC, layout_type L, class SC, class Tag>
+    inline auto xarray_adaptor<EC, L, SC, Tag>::operator=(const xarray_adaptor& rhs) -> self_type&
+    {
+        base_type::operator=(rhs);
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    inline auto xarray_adaptor<EC, L, SC, Tag>::operator=(xarray_adaptor&& rhs) -> self_type&
+    {
+        base_type::operator=(std::move(rhs));
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    inline auto xarray_adaptor<EC, L, SC, Tag>::operator=(temporary_type&& rhs) -> self_type&
+    {
+        base_type::shape_impl() = std::move(const_cast<shape_type&>(rhs.shape()));
+        base_type::strides_impl() = std::move(const_cast<strides_type&>(rhs.strides()));
+        base_type::backstrides_impl() = std::move(const_cast<backstrides_type&>(rhs.backstrides()));
+        m_storage = std::move(rhs.storage());
+        return *this;
+    }
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class EC, layout_type L, class SC, class Tag>
+    template <class E>
+    inline auto xarray_adaptor<EC, L, SC, Tag>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class EC, layout_type L, class SC, class Tag>
+    inline auto xarray_adaptor<EC, L, SC, Tag>::storage_impl() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    inline auto xarray_adaptor<EC, L, SC, Tag>::storage_impl() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    template <class P, class S>
+    inline void xarray_adaptor<EC, L, SC, Tag>::reset_buffer(P&& pointer, S&& size)
+    {
+        return m_storage.reset_data(std::forward<P>(pointer), std::forward<S>(size));
+    }
+}
+
+#endif

+ 1367 - 0
3rd/numpy/include/xtensor/xassign.hpp

@@ -0,0 +1,1367 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_ASSIGN_HPP
+#define XTENSOR_ASSIGN_HPP
+
+#include <algorithm>
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xcomplex.hpp>
+#include <xtl/xsequence.hpp>
+
+#include "xexpression.hpp"
+#include "xfunction.hpp"
+#include "xiterator.hpp"
+#include "xstrides.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_forward.hpp"
+#include "xutils.hpp"
+
+#if defined(XTENSOR_USE_TBB)
+#include <tbb/tbb.h>
+#endif
+
+namespace xt
+{
+
+    /********************
+     * Assign functions *
+     ********************/
+
+    template <class E1, class E2>
+    void assign_data(xexpression<E1>& e1, const xexpression<E2>& e2, bool trivial);
+
+    template <class E1, class E2>
+    void assign_xexpression(xexpression<E1>& e1, const xexpression<E2>& e2);
+
+    template <class E1, class E2>
+    void computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2);
+
+    template <class E1, class E2, class F>
+    void scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f);
+
+    template <class E1, class E2>
+    void assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2);
+
+    template <class E1, class E2>
+    void strided_assign(E1& e1, const E2& e2, std::false_type /*disable*/);
+
+    template <class E1, class E2>
+    void strided_assign(E1& e1, const E2& e2, std::true_type /*enable*/);
+
+    /************************
+     * xexpression_assigner *
+     ************************/
+
+    template <class Tag>
+    class xexpression_assigner_base;
+
+    template <>
+    class xexpression_assigner_base<xtensor_expression_tag>
+    {
+    public:
+
+        template <class E1, class E2>
+        static void assign_data(xexpression<E1>& e1, const xexpression<E2>& e2, bool trivial);
+    };
+
+    template <class Tag>
+    class xexpression_assigner : public xexpression_assigner_base<Tag>
+    {
+    public:
+
+        using base_type = xexpression_assigner_base<Tag>;
+
+        template <class E1, class E2>
+        static void assign_xexpression(E1& e1, const E2& e2);
+
+        template <class E1, class E2>
+        static void computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2);
+
+        template <class E1, class E2, class F>
+        static void scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f);
+
+        template <class E1, class E2>
+        static void assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2);
+
+    private:
+
+        template <class E1, class E2>
+        static bool resize(E1& e1, const E2& e2);
+
+        template <class E1, class F, class... CT>
+        static bool resize(E1& e1, const xfunction<F, CT...>& e2);
+    };
+
+    /********************
+     * stepper_assigner *
+     ********************/
+
+    template <class E1, class E2, layout_type L>
+    class stepper_assigner
+    {
+    public:
+
+        using lhs_iterator = typename E1::stepper;
+        using rhs_iterator = typename E2::const_stepper;
+        using shape_type = typename E1::shape_type;
+        using index_type = xindex_type_t<shape_type>;
+        using size_type = typename lhs_iterator::size_type;
+        using difference_type = typename lhs_iterator::difference_type;
+
+        stepper_assigner(E1& e1, const E2& e2);
+
+        void run();
+
+        void step(size_type i);
+        void step(size_type i, size_type n);
+        void reset(size_type i);
+
+        void to_end(layout_type);
+
+    private:
+
+        E1& m_e1;
+
+        lhs_iterator m_lhs;
+        rhs_iterator m_rhs;
+
+        index_type m_index;
+    };
+
+    /*******************
+     * linear_assigner *
+     *******************/
+
+    template <bool simd_assign>
+    class linear_assigner
+    {
+    public:
+
+        template <class E1, class E2>
+        static void run(E1& e1, const E2& e2);
+    };
+
+    template <>
+    class linear_assigner<false>
+    {
+    public:
+
+        template <class E1, class E2>
+        static void run(E1& e1, const E2& e2);
+
+    private:
+
+        template <class E1, class E2>
+        static void run_impl(E1& e1, const E2& e2, std::true_type);
+
+        template <class E1, class E2>
+        static void run_impl(E1& e1, const E2& e2, std::false_type);
+    };
+
+    /*************************
+     * strided_loop_assigner *
+     *************************/
+
+    namespace strided_assign_detail
+    {
+        struct loop_sizes_t
+        {
+            bool can_do_strided_assign;
+            bool is_row_major;
+            std::size_t inner_loop_size;
+            std::size_t outer_loop_size;
+            std::size_t cut;
+            std::size_t dimension;
+        };
+    }
+
+    template <bool simd>
+    class strided_loop_assigner
+    {
+    public:
+
+        using loop_sizes_t = strided_assign_detail::loop_sizes_t;
+        // is_row_major, inner_loop_size, outer_loop_size, cut
+        template <class E1, class E2>
+        static void run(E1& e1, const E2& e2, const loop_sizes_t& loop_sizes);
+        template <class E1, class E2>
+        static loop_sizes_t get_loop_sizes(E1& e1, const E2& e2);
+        template <class E1, class E2>
+        static void run(E1& e1, const E2& e2);
+    };
+
+    /***********************************
+     * Assign functions implementation *
+     ***********************************/
+
+    template <class E1, class E2>
+    inline void assign_data(xexpression<E1>& e1, const xexpression<E2>& e2, bool trivial)
+    {
+        using tag = xexpression_tag_t<E1, E2>;
+        xexpression_assigner<tag>::assign_data(e1, e2, trivial);
+    }
+
+    template <class E1, class E2>
+    inline void assign_xexpression(xexpression<E1>& e1, const xexpression<E2>& e2)
+    {
+        xtl::mpl::static_if<has_assign_to<E1, E2>::value>(
+            [&](auto self)
+            {
+                self(e2).derived_cast().assign_to(e1);
+            },
+            /*else*/
+            [&](auto /*self*/)
+            {
+                using tag = xexpression_tag_t<E1, E2>;
+                xexpression_assigner<tag>::assign_xexpression(e1, e2);
+            }
+        );
+    }
+
+    template <class E1, class E2>
+    inline void computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2)
+    {
+        using tag = xexpression_tag_t<E1, E2>;
+        xexpression_assigner<tag>::computed_assign(e1, e2);
+    }
+
+    template <class E1, class E2, class F>
+    inline void scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f)
+    {
+        using tag = xexpression_tag_t<E1, E2>;
+        xexpression_assigner<tag>::scalar_computed_assign(e1, e2, std::forward<F>(f));
+    }
+
+    template <class E1, class E2>
+    inline void assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2)
+    {
+        using tag = xexpression_tag_t<E1, E2>;
+        xexpression_assigner<tag>::assert_compatible_shape(e1, e2);
+    }
+
+    /***************************************
+     * xexpression_assigner implementation *
+     ***************************************/
+
+    namespace detail
+    {
+        template <class E1, class E2>
+        constexpr bool linear_static_layout()
+        {
+            // A row_major or column_major container with a dimension <= 1 is computed as
+            // layout any, leading to some performance improvements, for example when
+            // assigning a col-major vector to a row-major vector etc
+            return compute_layout(
+                       select_layout<E1::static_layout, typename E1::shape_type>::value,
+                       select_layout<E2::static_layout, typename E2::shape_type>::value
+                   )
+                   != layout_type::dynamic;
+        }
+
+        template <class E1, class E2>
+        inline auto is_linear_assign(const E1& e1, const E2& e2)
+            -> std::enable_if_t<has_strides<E1>::value, bool>
+        {
+            return (E1::contiguous_layout && E2::contiguous_layout && linear_static_layout<E1, E2>())
+                   || (e1.is_contiguous() && e2.has_linear_assign(e1.strides()));
+        }
+
+        template <class E1, class E2>
+        inline auto is_linear_assign(const E1&, const E2&) -> std::enable_if_t<!has_strides<E1>::value, bool>
+        {
+            return false;
+        }
+
+        template <class E1, class E2>
+        inline bool linear_dynamic_layout(const E1& e1, const E2& e2)
+        {
+            return e1.is_contiguous() && e2.is_contiguous()
+                   && compute_layout(e1.layout(), e2.layout()) != layout_type::dynamic;
+        }
+
+        template <class E, class = void>
+        struct has_step_leading : std::false_type
+        {
+        };
+
+        template <class E>
+        struct has_step_leading<E, void_t<decltype(std::declval<E>().step_leading())>> : std::true_type
+        {
+        };
+
+        template <class T>
+        struct use_strided_loop
+        {
+            static constexpr bool stepper_deref()
+            {
+                return std::is_reference<typename T::stepper::reference>::value;
+            }
+
+            static constexpr bool value = has_strides<T>::value
+                                          && has_step_leading<typename T::stepper>::value && stepper_deref();
+        };
+
+        template <class T>
+        struct use_strided_loop<xscalar<T>>
+        {
+            static constexpr bool value = true;
+        };
+
+        template <class F, class... CT>
+        struct use_strided_loop<xfunction<F, CT...>>
+        {
+            static constexpr bool value = xtl::conjunction<use_strided_loop<std::decay_t<CT>>...>::value;
+        };
+
+        /**
+         * Considering the assignment LHS = RHS, if the requested value type used for
+         * loading simd from RHS is not complex while LHS value_type is complex,
+         * the assignment fails. The reason is that SIMD batches of complex values cannot
+         * be implicitly instantiated from batches of scalar values.
+         * Making the constructor implicit does not fix the issue since in the end,
+         * the assignment is done with vec.store(buffer) where vec is a batch of scalars
+         * and buffer an array of complex. SIMD batches of scalars do not provide overloads
+         * of store that accept buffer of complex values and that SHOULD NOT CHANGE.
+         * Load and store overloads must accept SCALAR BUFFERS ONLY.
+         * Therefore, the solution is to explicitly force the instantiation of complex
+         * batches in the assignment mechanism. A common situation that triggers this
+         * issue is:
+         * xt::xarray<double> rhs = { 1, 2, 3 };
+         * xt::xarray<std::complex<double>> lhs = rhs;
+         */
+        template <class T1, class T2>
+        struct conditional_promote_to_complex
+        {
+            static constexpr bool cond = xtl::is_gen_complex<T1>::value && !xtl::is_gen_complex<T2>::value;
+            // Alternative: use std::complex<T2> or xcomplex<T2, T2, bool> depending on T1
+            using type = std::conditional_t<cond, T1, T2>;
+        };
+
+        template <class T1, class T2>
+        using conditional_promote_to_complex_t = typename conditional_promote_to_complex<T1, T2>::type;
+    }
+
+    template <class E1, class E2>
+    class xassign_traits
+    {
+    private:
+
+        using e1_value_type = typename E1::value_type;
+        using e2_value_type = typename E2::value_type;
+
+        template <class T>
+        using is_bool = std::is_same<T, bool>;
+
+        static constexpr bool is_bool_conversion()
+        {
+            return is_bool<e2_value_type>::value && !is_bool<e1_value_type>::value;
+        }
+
+        static constexpr bool contiguous_layout()
+        {
+            return E1::contiguous_layout && E2::contiguous_layout;
+        }
+
+        static constexpr bool convertible_types()
+        {
+            return std::is_convertible<e2_value_type, e1_value_type>::value && !is_bool_conversion();
+        }
+
+        static constexpr bool use_xsimd()
+        {
+            return xt_simd::simd_traits<int8_t>::size > 1;
+        }
+
+        template <class T>
+        static constexpr bool simd_size_impl()
+        {
+            return xt_simd::simd_traits<T>::size > 1 || (is_bool<T>::value && use_xsimd());
+        }
+
+        static constexpr bool simd_size()
+        {
+            return simd_size_impl<e1_value_type>() && simd_size_impl<e2_value_type>();
+        }
+
+        static constexpr bool simd_interface()
+        {
+            return has_simd_interface<E1, requested_value_type>()
+                   && has_simd_interface<E2, requested_value_type>();
+        }
+
+    public:
+
+        // constexpr methods instead of constexpr data members avoid the need of definitions at namespace
+        // scope of these data members (since they are odr-used).
+
+        static constexpr bool simd_assign()
+        {
+            return convertible_types() && simd_size() && simd_interface();
+        }
+
+        static constexpr bool linear_assign(const E1& e1, const E2& e2, bool trivial)
+        {
+            return trivial && detail::is_linear_assign(e1, e2);
+        }
+
+        static constexpr bool strided_assign()
+        {
+            return detail::use_strided_loop<E1>::value && detail::use_strided_loop<E2>::value;
+        }
+
+        static constexpr bool simd_linear_assign()
+        {
+            return contiguous_layout() && simd_assign();
+        }
+
+        static constexpr bool simd_strided_assign()
+        {
+            return strided_assign() && simd_assign();
+        }
+
+        static constexpr bool simd_linear_assign(const E1& e1, const E2& e2)
+        {
+            return simd_assign() && detail::linear_dynamic_layout(e1, e2);
+        }
+
+        using e2_requested_value_type = std::
+            conditional_t<is_bool<e2_value_type>::value, typename E2::bool_load_type, e2_value_type>;
+        using requested_value_type = detail::conditional_promote_to_complex_t<e1_value_type, e2_requested_value_type>;
+    };
+
+    template <class E1, class E2>
+    inline void xexpression_assigner_base<xtensor_expression_tag>::assign_data(
+        xexpression<E1>& e1,
+        const xexpression<E2>& e2,
+        bool trivial
+    )
+    {
+        E1& de1 = e1.derived_cast();
+        const E2& de2 = e2.derived_cast();
+        using traits = xassign_traits<E1, E2>;
+
+        bool linear_assign = traits::linear_assign(de1, de2, trivial);
+        constexpr bool simd_assign = traits::simd_assign();
+        constexpr bool simd_linear_assign = traits::simd_linear_assign();
+        constexpr bool simd_strided_assign = traits::simd_strided_assign();
+        if (linear_assign)
+        {
+            if (simd_linear_assign || traits::simd_linear_assign(de1, de2))
+            {
+                // Do not use linear_assigner<true> here since it will make the compiler
+                // instantiate this branch even if the runtime condition is false, resulting
+                // in compilation error for expressions that do not provide a SIMD interface.
+                // simd_assign is true if simd_linear_assign() or simd_linear_assign(de1, de2)
+                // is true.
+                linear_assigner<simd_assign>::run(de1, de2);
+            }
+            else
+            {
+                linear_assigner<false>::run(de1, de2);
+            }
+        }
+        else if (simd_strided_assign)
+        {
+            strided_loop_assigner<simd_strided_assign>::run(de1, de2);
+        }
+        else
+        {
+            stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(de1, de2).run();
+        }
+    }
+
+    template <class Tag>
+    template <class E1, class E2>
+    inline void xexpression_assigner<Tag>::assign_xexpression(E1& e1, const E2& e2)
+    {
+        bool trivial_broadcast = resize(e1.derived_cast(), e2.derived_cast());
+        base_type::assign_data(e1, e2, trivial_broadcast);
+    }
+
+    template <class Tag>
+    template <class E1, class E2>
+    inline void xexpression_assigner<Tag>::computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2)
+    {
+        using shape_type = typename E1::shape_type;
+        using comperator_type = std::greater<typename shape_type::value_type>;
+
+        using size_type = typename E1::size_type;
+
+        E1& de1 = e1.derived_cast();
+        const E2& de2 = e2.derived_cast();
+
+        size_type dim2 = de2.dimension();
+        shape_type shape = uninitialized_shape<shape_type>(dim2);
+
+        bool trivial_broadcast = de2.broadcast_shape(shape, true);
+
+        auto&& de1_shape = de1.shape();
+        if (dim2 > de1.dimension()
+            || std::lexicographical_compare(
+                shape.begin(),
+                shape.end(),
+                de1_shape.begin(),
+                de1_shape.end(),
+                comperator_type()
+            ))
+        {
+            typename E1::temporary_type tmp(shape);
+            base_type::assign_data(tmp, e2, trivial_broadcast);
+            de1.assign_temporary(std::move(tmp));
+        }
+        else
+        {
+            base_type::assign_data(e1, e2, trivial_broadcast);
+        }
+    }
+
+    template <class Tag>
+    template <class E1, class E2, class F>
+    inline void xexpression_assigner<Tag>::scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f)
+    {
+        E1& d = e1.derived_cast();
+        using size_type = typename E1::size_type;
+        auto dst = d.storage().begin();
+        for (size_type i = d.size(); i > 0; --i)
+        {
+            *dst = f(*dst, e2);
+            ++dst;
+        }
+    }
+
+    template <class Tag>
+    template <class E1, class E2>
+    inline void
+    xexpression_assigner<Tag>::assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2)
+    {
+        const E1& de1 = e1.derived_cast();
+        const E2& de2 = e2.derived_cast();
+        if (!broadcastable(de2.shape(), de1.shape()))
+        {
+            throw_broadcast_error(de2.shape(), de1.shape());
+        }
+    }
+
+    namespace detail
+    {
+        template <bool B, class... CT>
+        struct static_trivial_broadcast;
+
+        template <class... CT>
+        struct static_trivial_broadcast<true, CT...>
+        {
+            static constexpr bool value = detail::promote_index<typename std::decay_t<CT>::shape_type...>::value;
+        };
+
+        template <class... CT>
+        struct static_trivial_broadcast<false, CT...>
+        {
+            static constexpr bool value = false;
+        };
+    }
+
+    template <class Tag>
+    template <class E1, class E2>
+    inline bool xexpression_assigner<Tag>::resize(E1& e1, const E2& e2)
+    {
+        // If our RHS is not a xfunction, we know that the RHS is at least potentially trivial
+        // We check the strides of the RHS in detail::is_trivial_broadcast to see if they match up!
+        // So we can skip a shape copy and a call to broadcast_shape(...)
+        e1.resize(e2.shape());
+        return true;
+    }
+
+    template <class Tag>
+    template <class E1, class F, class... CT>
+    inline bool xexpression_assigner<Tag>::resize(E1& e1, const xfunction<F, CT...>& e2)
+    {
+        return xtl::mpl::static_if<detail::is_fixed<typename xfunction<F, CT...>::shape_type>::value>(
+            [&](auto /*self*/)
+            {
+                /*
+                 * If the shape of the xfunction is statically known, we can compute the broadcast triviality
+                 * at compile time plus we can resize right away.
+                 */
+                // resize in case LHS is not a fixed size container. If it is, this is a NOP
+                e1.resize(typename xfunction<F, CT...>::shape_type{});
+                return detail::static_trivial_broadcast<
+                    detail::is_fixed<typename xfunction<F, CT...>::shape_type>::value,
+                    CT...>::value;
+            },
+            /* else */
+            [&](auto /*self*/)
+            {
+                using index_type = xindex_type_t<typename E1::shape_type>;
+                using size_type = typename E1::size_type;
+                size_type size = e2.dimension();
+                index_type shape = uninitialized_shape<index_type>(size);
+                bool trivial_broadcast = e2.broadcast_shape(shape, true);
+                e1.resize(std::move(shape));
+                return trivial_broadcast;
+            }
+        );
+    }
+
+    /***********************************
+     * stepper_assigner implementation *
+     ***********************************/
+
+    template <class FROM, class TO>
+    struct is_narrowing_conversion
+    {
+        using argument_type = std::decay_t<FROM>;
+        using result_type = std::decay_t<TO>;
+
+        static const bool value = xtl::is_arithmetic<result_type>::value
+                                  && (sizeof(result_type) < sizeof(argument_type)
+                                      || (xtl::is_integral<result_type>::value
+                                          && std::is_floating_point<argument_type>::value));
+    };
+
+    template <class FROM, class TO>
+    struct has_sign_conversion
+    {
+        using argument_type = std::decay_t<FROM>;
+        using result_type = std::decay_t<TO>;
+
+        static const bool value = xtl::is_signed<argument_type>::value != xtl::is_signed<result_type>::value;
+    };
+
+    template <class FROM, class TO>
+    struct has_assign_conversion
+    {
+        using argument_type = std::decay_t<FROM>;
+        using result_type = std::decay_t<TO>;
+
+        static const bool value = is_narrowing_conversion<argument_type, result_type>::value
+                                  || has_sign_conversion<argument_type, result_type>::value;
+    };
+
+    template <class E1, class E2, layout_type L>
+    inline stepper_assigner<E1, E2, L>::stepper_assigner(E1& e1, const E2& e2)
+        : m_e1(e1)
+        , m_lhs(e1.stepper_begin(e1.shape()))
+        , m_rhs(e2.stepper_begin(e1.shape()))
+        , m_index(xtl::make_sequence<index_type>(e1.shape().size(), size_type(0)))
+    {
+    }
+
+    template <class E1, class E2, layout_type L>
+    inline void stepper_assigner<E1, E2, L>::run()
+    {
+        using tmp_size_type = typename E1::size_type;
+        using argument_type = std::decay_t<decltype(*m_rhs)>;
+        using result_type = std::decay_t<decltype(*m_lhs)>;
+        constexpr bool needs_cast = has_assign_conversion<argument_type, result_type>::value;
+
+        tmp_size_type s = m_e1.size();
+        for (tmp_size_type i = 0; i < s; ++i)
+        {
+            *m_lhs = conditional_cast<needs_cast, result_type>(*m_rhs);
+            stepper_tools<L>::increment_stepper(*this, m_index, m_e1.shape());
+        }
+    }
+
+    template <class E1, class E2, layout_type L>
+    inline void stepper_assigner<E1, E2, L>::step(size_type i)
+    {
+        m_lhs.step(i);
+        m_rhs.step(i);
+    }
+
+    template <class E1, class E2, layout_type L>
+    inline void stepper_assigner<E1, E2, L>::step(size_type i, size_type n)
+    {
+        m_lhs.step(i, n);
+        m_rhs.step(i, n);
+    }
+
+    template <class E1, class E2, layout_type L>
+    inline void stepper_assigner<E1, E2, L>::reset(size_type i)
+    {
+        m_lhs.reset(i);
+        m_rhs.reset(i);
+    }
+
+    template <class E1, class E2, layout_type L>
+    inline void stepper_assigner<E1, E2, L>::to_end(layout_type l)
+    {
+        m_lhs.to_end(l);
+        m_rhs.to_end(l);
+    }
+
+    /**********************************
+     * linear_assigner implementation *
+     **********************************/
+
+    template <bool simd_assign>
+    template <class E1, class E2>
+    inline void linear_assigner<simd_assign>::run(E1& e1, const E2& e2)
+    {
+        using lhs_align_mode = xt_simd::container_alignment_t<E1>;
+        constexpr bool is_aligned = std::is_same<lhs_align_mode, aligned_mode>::value;
+        using rhs_align_mode = std::conditional_t<is_aligned, inner_aligned_mode, unaligned_mode>;
+        using e1_value_type = typename E1::value_type;
+        using e2_value_type = typename E2::value_type;
+        using value_type = typename xassign_traits<E1, E2>::requested_value_type;
+        using simd_type = xt_simd::simd_type<value_type>;
+        using size_type = typename E1::size_type;
+        size_type size = e1.size();
+        constexpr size_type simd_size = simd_type::size;
+        constexpr bool needs_cast = has_assign_conversion<e1_value_type, e2_value_type>::value;
+
+        size_type align_begin = is_aligned ? 0 : xt_simd::get_alignment_offset(e1.data(), size, simd_size);
+        size_type align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
+
+        for (size_type i = 0; i < align_begin; ++i)
+        {
+            e1.data_element(i) = conditional_cast<needs_cast, e1_value_type>(e2.data_element(i));
+        }
+
+#if defined(XTENSOR_USE_TBB)
+        if (size >= XTENSOR_TBB_THRESHOLD)
+        {
+            tbb::static_partitioner ap;
+            tbb::parallel_for(
+                align_begin,
+                align_end,
+                simd_size,
+                [&e1, &e2](size_t i)
+                {
+                    e1.template store_simd<lhs_align_mode>(
+                        i,
+                        e2.template load_simd<rhs_align_mode, value_type>(i)
+                    );
+                },
+                ap
+            );
+        }
+        else
+        {
+            for (size_type i = align_begin; i < align_end; i += simd_size)
+            {
+                e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
+            }
+        }
+#elif defined(XTENSOR_USE_OPENMP)
+        if (size >= size_type(XTENSOR_OPENMP_TRESHOLD))
+        {
+#pragma omp parallel for default(none) shared(align_begin, align_end, e1, e2)
+#ifndef _WIN32
+            for (size_type i = align_begin; i < align_end; i += simd_size)
+            {
+                e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
+            }
+#else
+            for (auto i = static_cast<std::ptrdiff_t>(align_begin); i < static_cast<std::ptrdiff_t>(align_end);
+                 i += static_cast<std::ptrdiff_t>(simd_size))
+            {
+                size_type ui = static_cast<size_type>(i);
+                e1.template store_simd<lhs_align_mode>(ui, e2.template load_simd<rhs_align_mode, value_type>(ui));
+            }
+#endif
+        }
+        else
+        {
+            for (size_type i = align_begin; i < align_end; i += simd_size)
+            {
+                e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
+            }
+        }
+#else
+        for (size_type i = align_begin; i < align_end; i += simd_size)
+        {
+            e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
+        }
+#endif
+        for (size_type i = align_end; i < size; ++i)
+        {
+            e1.data_element(i) = conditional_cast<needs_cast, e1_value_type>(e2.data_element(i));
+        }
+    }
+
+    template <class E1, class E2>
+    inline void linear_assigner<false>::run(E1& e1, const E2& e2)
+    {
+        using is_convertible = std::
+            is_convertible<typename std::decay_t<E2>::value_type, typename std::decay_t<E1>::value_type>;
+        // If the types are not compatible, this function is still instantiated but never called.
+        // To avoid compilation problems in effectively unused code trivial_assigner_run_impl is
+        // empty in this case.
+        run_impl(e1, e2, is_convertible());
+    }
+
+    template <class E1, class E2>
+    inline void linear_assigner<false>::run_impl(E1& e1, const E2& e2, std::true_type /*is_convertible*/)
+    {
+        using value_type = typename E1::value_type;
+        using size_type = typename E1::size_type;
+        auto src = linear_begin(e2);
+        auto dst = linear_begin(e1);
+        size_type n = e1.size();
+#if defined(XTENSOR_USE_TBB)
+        tbb::static_partitioner sp;
+        tbb::parallel_for(
+            std::ptrdiff_t(0),
+            static_cast<std::ptrdiff_t>(n),
+            [&](std::ptrdiff_t i)
+            {
+                *(dst + i) = static_cast<value_type>(*(src + i));
+            },
+            sp
+        );
+#elif defined(XTENSOR_USE_OPENMP)
+        if (n >= XTENSOR_OPENMP_TRESHOLD)
+        {
+#pragma omp parallel for default(none) shared(src, dst, n)
+            for (std::ptrdiff_t i = std::ptrdiff_t(0); i < static_cast<std::ptrdiff_t>(n); i++)
+            {
+                *(dst + i) = static_cast<value_type>(*(src + i));
+            }
+        }
+        else
+        {
+            for (; n > size_type(0); --n)
+            {
+                *dst = static_cast<value_type>(*src);
+                ++src;
+                ++dst;
+            }
+        }
+#else
+        for (; n > size_type(0); --n)
+        {
+            *dst = static_cast<value_type>(*src);
+            ++src;
+            ++dst;
+        }
+#endif
+    }
+
+    template <class E1, class E2>
+    inline void linear_assigner<false>::run_impl(E1&, const E2&, std::false_type /*is_convertible*/)
+    {
+        XTENSOR_PRECONDITION(false, "Internal error: linear_assigner called with unrelated types.");
+    }
+
+    /****************************************
+     * strided_loop_assigner implementation *
+     ****************************************/
+
+    namespace strided_assign_detail
+    {
+        template <layout_type layout>
+        struct idx_tools;
+
+        template <>
+        struct idx_tools<layout_type::row_major>
+        {
+            template <class T>
+            static void next_idx(T& outer_index, T& outer_shape)
+            {
+                auto i = outer_index.size();
+                for (; i > 0; --i)
+                {
+                    if (outer_index[i - 1] + 1 >= outer_shape[i - 1])
+                    {
+                        outer_index[i - 1] = 0;
+                    }
+                    else
+                    {
+                        outer_index[i - 1]++;
+                        break;
+                    }
+                }
+            }
+
+            template <class T>
+            static void nth_idx(size_t n, T& outer_index, const T& outer_shape)
+            {
+                dynamic_shape<std::size_t> stride_sizes;
+                xt::resize_container(stride_sizes, outer_shape.size());
+                // compute strides
+                using size_type = typename T::size_type;
+                for (size_type i = outer_shape.size(); i > 0; i--)
+                {
+                    stride_sizes[i - 1] = (i == outer_shape.size()) ? 1 : stride_sizes[i] * outer_shape[i];
+                }
+
+                // compute index
+                for (size_type i = 0; i < outer_shape.size(); i++)
+                {
+                    auto d_idx = n / stride_sizes[i];
+                    outer_index[i] = d_idx;
+                    n -= d_idx * stride_sizes[i];
+                }
+            }
+        };
+
+        template <>
+        struct idx_tools<layout_type::column_major>
+        {
+            template <class T>
+            static void next_idx(T& outer_index, T& outer_shape)
+            {
+                using size_type = typename T::size_type;
+                size_type i = 0;
+                auto sz = outer_index.size();
+                for (; i < sz; ++i)
+                {
+                    if (outer_index[i] + 1 >= outer_shape[i])
+                    {
+                        outer_index[i] = 0;
+                    }
+                    else
+                    {
+                        outer_index[i]++;
+                        break;
+                    }
+                }
+            }
+
+            template <class T>
+            static void nth_idx(size_t n, T& outer_index, const T& outer_shape)
+            {
+                dynamic_shape<std::size_t> stride_sizes;
+                xt::resize_container(stride_sizes, outer_shape.size());
+
+                using size_type = typename T::size_type;
+
+                // compute required strides
+                for (size_type i = 0; i < outer_shape.size(); i++)
+                {
+                    stride_sizes[i] = (i == 0) ? 1 : stride_sizes[i - 1] * outer_shape[i - 1];
+                }
+
+                // compute index
+                for (size_type i = outer_shape.size(); i > 0;)
+                {
+                    i--;
+                    auto d_idx = n / stride_sizes[i];
+                    outer_index[i] = d_idx;
+                    n -= d_idx * stride_sizes[i];
+                }
+            }
+        };
+
+        template <layout_type L, class S>
+        struct check_strides_functor
+        {
+            using strides_type = S;
+
+            check_strides_functor(const S& strides)
+                : m_cut(L == layout_type::row_major ? 0 : strides.size())
+                , m_strides(strides)
+            {
+            }
+
+            template <class T, layout_type LE = L>
+            std::enable_if_t<LE == layout_type::row_major, std::size_t> operator()(const T& el)
+            {
+                // All dimenions less than var have differing strides
+                auto var = check_strides_overlap<layout_type::row_major>::get(m_strides, el.strides());
+                if (var > m_cut)
+                {
+                    m_cut = var;
+                }
+                return m_cut;
+            }
+
+            template <class T, layout_type LE = L>
+            std::enable_if_t<LE == layout_type::column_major, std::size_t> operator()(const T& el)
+            {
+                auto var = check_strides_overlap<layout_type::column_major>::get(m_strides, el.strides());
+                // All dimensions >= var have differing strides
+                if (var < m_cut)
+                {
+                    m_cut = var;
+                }
+                return m_cut;
+            }
+
+            template <class T>
+            std::size_t operator()(const xt::xscalar<T>& /*el*/)
+            {
+                return m_cut;
+            }
+
+            template <class F, class... CT>
+            std::size_t operator()(const xt::xfunction<F, CT...>& xf)
+            {
+                xt::for_each(*this, xf.arguments());
+                return m_cut;
+            }
+
+        private:
+
+            std::size_t m_cut;
+            const strides_type& m_strides;
+        };
+
+        template <bool possible = true, class E1, class E2, std::enable_if_t<!has_strides<E1>::value || !possible, bool> = true>
+        loop_sizes_t get_loop_sizes(const E1& e1, const E2&)
+        {
+            return {false, true, 1, e1.size(), e1.dimension(), e1.dimension()};
+        }
+
+        template <bool possible = true, class E1, class E2, std::enable_if_t<has_strides<E1>::value && possible, bool> = true>
+        loop_sizes_t get_loop_sizes(const E1& e1, const E2& e2)
+        {
+            using shape_value_type = typename E1::shape_type::value_type;
+            bool is_row_major = true;
+
+            // Try to find a row-major scheme first, where the outer loop is on the first N = `cut`
+            // dimensions, and the inner loop is
+            is_row_major = true;
+            auto is_zero = [](auto i)
+            {
+                return i == 0;
+            };
+            auto&& strides = e1.strides();
+            auto it_bwd = std::find_if_not(strides.rbegin(), strides.rend(), is_zero);
+            bool de1_row_contiguous = it_bwd != strides.rend() && *it_bwd == 1;
+            auto it_fwd = std::find_if_not(strides.begin(), strides.end(), is_zero);
+            bool de1_col_contiguous = it_fwd != strides.end() && *it_fwd == 1;
+            if (de1_row_contiguous)
+            {
+                is_row_major = true;
+            }
+            else if (de1_col_contiguous)
+            {
+                is_row_major = false;
+            }
+            else
+            {
+                // No strided loop possible.
+                return {false, true, 1, e1.size(), e1.dimension(), e1.dimension()};
+            }
+
+            // Cut is the number of dimensions in the outer loop
+            std::size_t cut = 0;
+
+            if (is_row_major)
+            {
+                auto csf = check_strides_functor<layout_type::row_major, decltype(e1.strides())>(e1.strides());
+                cut = csf(e2);
+                // This makes that only one dimension will be treated in the inner loop.
+                if (cut < e1.strides().size() - 1)
+                {
+                    // Only make the inner loop go over one dimension by default for now
+                    cut = e1.strides().size() - 1;
+                }
+            }
+            else if (!is_row_major)
+            {
+                auto csf = check_strides_functor<layout_type::column_major, decltype(e1.strides())>(e1.strides()
+                );
+                cut = csf(e2);
+                if (cut > 1)
+                {
+                    // Only make the inner loop go over one dimension by default for now
+                    cut = 1;
+                }
+            }  // can't reach here because this would have already triggered the fallback
+
+            std::size_t outer_loop_size = static_cast<std::size_t>(std::accumulate(
+                e1.shape().begin(),
+                e1.shape().begin() + static_cast<std::ptrdiff_t>(cut),
+                shape_value_type(1),
+                std::multiplies<shape_value_type>{}
+            ));
+            std::size_t inner_loop_size = static_cast<std::size_t>(std::accumulate(
+                e1.shape().begin() + static_cast<std::ptrdiff_t>(cut),
+                e1.shape().end(),
+                shape_value_type(1),
+                std::multiplies<shape_value_type>{}
+            ));
+
+            if (!is_row_major)
+            {
+                std::swap(outer_loop_size, inner_loop_size);
+            }
+
+            return {inner_loop_size > 1, is_row_major, inner_loop_size, outer_loop_size, cut, e1.dimension()};
+        }
+    }
+
+    template <bool simd>
+    template <class E1, class E2>
+    inline strided_assign_detail::loop_sizes_t strided_loop_assigner<simd>::get_loop_sizes(E1& e1, const E2& e2)
+    {
+        return strided_assign_detail::get_loop_sizes<simd>(e1, e2);
+    }
+
+#define strided_parallel_assign
+
+    template <bool simd>
+    template <class E1, class E2>
+    inline void strided_loop_assigner<simd>::run(E1& e1, const E2& e2, const loop_sizes_t& loop_sizes)
+    {
+        bool is_row_major = loop_sizes.is_row_major;
+        std::size_t inner_loop_size = loop_sizes.inner_loop_size;
+        std::size_t outer_loop_size = loop_sizes.outer_loop_size;
+        std::size_t cut = loop_sizes.cut;
+
+
+        // TODO can we get rid of this and use `shape_type`?
+        dynamic_shape<std::size_t> idx, max_shape;
+
+        if (is_row_major)
+        {
+            xt::resize_container(idx, cut);
+            max_shape.assign(e1.shape().begin(), e1.shape().begin() + static_cast<std::ptrdiff_t>(cut));
+        }
+        else
+        {
+            xt::resize_container(idx, e1.shape().size() - cut);
+            max_shape.assign(e1.shape().begin() + static_cast<std::ptrdiff_t>(cut), e1.shape().end());
+        }
+
+        // add this when we have std::array index!
+        // std::fill(idx.begin(), idx.end(), 0);
+        using e1_value_type = typename E1::value_type;
+        using e2_value_type = typename E2::value_type;
+        constexpr bool needs_cast = has_assign_conversion<e1_value_type, e2_value_type>::value;
+        using value_type = typename xassign_traits<E1, E2>::requested_value_type;
+        using simd_type = std::conditional_t<
+            std::is_same<e1_value_type, bool>::value,
+            xt_simd::simd_bool_type<value_type>,
+            xt_simd::simd_type<value_type>>;
+
+        std::size_t simd_size = inner_loop_size / simd_type::size;
+        std::size_t simd_rest = inner_loop_size % simd_type::size;
+
+        auto fct_stepper = e2.stepper_begin(e1.shape());
+        auto res_stepper = e1.stepper_begin(e1.shape());
+
+        // TODO in 1D case this is ambiguous -- could be RM or CM.
+        //      Use default layout to make decision
+        std::size_t step_dim = 0;
+        if (!is_row_major)  // row major case
+        {
+            step_dim = cut;
+        }
+#if defined(XTENSOR_USE_OPENMP) && defined(strided_parallel_assign)
+        if (outer_loop_size >= XTENSOR_OPENMP_TRESHOLD / inner_loop_size)
+        {
+            std::size_t first_step = true;
+#pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, res_stepper, idx)
+            for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
+            {
+                if (first_step)
+                {
+                    is_row_major
+                        ? strided_assign_detail::idx_tools<layout_type::row_major>::nth_idx(ox, idx, max_shape)
+                        : strided_assign_detail::idx_tools<layout_type::column_major>::nth_idx(ox, idx, max_shape);
+
+                    for (std::size_t i = 0; i < idx.size(); ++i)
+                    {
+                        fct_stepper.step(i + step_dim, idx[i]);
+                        res_stepper.step(i + step_dim, idx[i]);
+                    }
+                    first_step = false;
+                }
+
+                for (std::size_t i = 0; i < simd_size; ++i)
+                {
+                    res_stepper.template store_simd(fct_stepper.template step_simd<value_type>());
+                }
+                for (std::size_t i = 0; i < simd_rest; ++i)
+                {
+                    *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
+                    res_stepper.step_leading();
+                    fct_stepper.step_leading();
+                }
+
+                // next unaligned index
+                is_row_major
+                    ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
+                    : strided_assign_detail::idx_tools<layout_type::column_major>::next_idx(idx, max_shape);
+
+                fct_stepper.to_begin();
+
+                // need to step E1 as well if not contigous assign (e.g. view)
+                if (!E1::contiguous_layout)
+                {
+                    res_stepper.to_begin();
+                    for (std::size_t i = 0; i < idx.size(); ++i)
+                    {
+                        fct_stepper.step(i + step_dim, idx[i]);
+                        res_stepper.step(i + step_dim, idx[i]);
+                    }
+                }
+                else
+                {
+                    for (std::size_t i = 0; i < idx.size(); ++i)
+                    {
+                        fct_stepper.step(i + step_dim, idx[i]);
+                    }
+                }
+            }
+        }
+        else
+        {
+#elif defined(strided_parallel_assign) && defined(XTENSOR_USE_TBB)
+        if (outer_loop_size > XTENSOR_TBB_THRESHOLD / inner_loop_size)
+        {
+            tbb::static_partitioner sp;
+            tbb::parallel_for(
+                tbb::blocked_range<size_t>(0ul, outer_loop_size),
+                [&e1, &e2, is_row_major, step_dim, simd_size, simd_rest, &max_shape, &idx_ = idx](
+                    const tbb::blocked_range<size_t>& r
+                )
+                {
+                    auto idx = idx_;
+                    auto fct_stepper = e2.stepper_begin(e1.shape());
+                    auto res_stepper = e1.stepper_begin(e1.shape());
+                    std::size_t first_step = true;
+                    // #pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper,
+                    // res_stepper, idx)
+                    for (std::size_t ox = r.begin(); ox < r.end(); ++ox)
+                    {
+                        if (first_step)
+                        {
+                            is_row_major
+                                ? strided_assign_detail::idx_tools<layout_type::row_major>::nth_idx(ox, idx, max_shape)
+                                : strided_assign_detail::idx_tools<layout_type::column_major>::nth_idx(
+                                    ox,
+                                    idx,
+                                    max_shape
+                                );
+
+                            for (std::size_t i = 0; i < idx.size(); ++i)
+                            {
+                                fct_stepper.step(i + step_dim, idx[i]);
+                                res_stepper.step(i + step_dim, idx[i]);
+                            }
+                            first_step = false;
+                        }
+
+                        for (std::size_t i = 0; i < simd_size; ++i)
+                        {
+                            res_stepper.template store_simd(fct_stepper.template step_simd<value_type>());
+                        }
+                        for (std::size_t i = 0; i < simd_rest; ++i)
+                        {
+                            *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
+                            res_stepper.step_leading();
+                            fct_stepper.step_leading();
+                        }
+
+                        // next unaligned index
+                        is_row_major
+                            ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
+                            : strided_assign_detail::idx_tools<layout_type::column_major>::next_idx(idx, max_shape);
+
+                        fct_stepper.to_begin();
+
+                        // need to step E1 as well if not contigous assign (e.g. view)
+                        if (!E1::contiguous_layout)
+                        {
+                            res_stepper.to_begin();
+                            for (std::size_t i = 0; i < idx.size(); ++i)
+                            {
+                                fct_stepper.step(i + step_dim, idx[i]);
+                                res_stepper.step(i + step_dim, idx[i]);
+                            }
+                        }
+                        else
+                        {
+                            for (std::size_t i = 0; i < idx.size(); ++i)
+                            {
+                                fct_stepper.step(i + step_dim, idx[i]);
+                            }
+                        }
+                    }
+                },
+                sp
+            );
+        }
+        else
+        {
+
+#endif
+            for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
+            {
+                for (std::size_t i = 0; i < simd_size; ++i)
+                {
+                    res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
+                }
+                for (std::size_t i = 0; i < simd_rest; ++i)
+                {
+                    *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
+                    res_stepper.step_leading();
+                    fct_stepper.step_leading();
+                }
+
+                is_row_major
+                    ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
+                    : strided_assign_detail::idx_tools<layout_type::column_major>::next_idx(idx, max_shape);
+
+                fct_stepper.to_begin();
+
+                // need to step E1 as well if not contigous assign (e.g. view)
+                if (!E1::contiguous_layout)
+                {
+                    res_stepper.to_begin();
+                    for (std::size_t i = 0; i < idx.size(); ++i)
+                    {
+                        fct_stepper.step(i + step_dim, idx[i]);
+                        res_stepper.step(i + step_dim, idx[i]);
+                    }
+                }
+                else
+                {
+                    for (std::size_t i = 0; i < idx.size(); ++i)
+                    {
+                        fct_stepper.step(i + step_dim, idx[i]);
+                    }
+                }
+            }
+#if (defined(XTENSOR_USE_OPENMP) || defined(XTENSOR_USE_TBB)) && defined(strided_parallel_assign)
+        }
+#endif
+    }
+
+    template <>
+    template <class E1, class E2>
+    inline void strided_loop_assigner<true>::run(E1& e1, const E2& e2)
+    {
+        strided_assign_detail::loop_sizes_t loop_sizes = strided_loop_assigner<true>::get_loop_sizes(e1, e2);
+        if (loop_sizes.can_do_strided_assign)
+        {
+            run(e1, e2, loop_sizes);
+        }
+        else
+        {
+            // trigger the fallback assigner
+            stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(e1, e2).run();
+        }
+    }
+
+    template <>
+    template <class E1, class E2>
+    inline void strided_loop_assigner<false>::run(E1& /*e1*/, const E2& /*e2*/, const loop_sizes_t&)
+    {
+    }
+
+    template <>
+    template <class E1, class E2>
+    inline void strided_loop_assigner<false>::run(E1& e1, const E2& e2)
+    {
+        // trigger the fallback assigner
+        stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(e1, e2).run();
+    }
+}
+
+#endif

+ 349 - 0
3rd/numpy/include/xtensor/xaxis_iterator.hpp

@@ -0,0 +1,349 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_AXIS_ITERATOR_HPP
+#define XTENSOR_AXIS_ITERATOR_HPP
+
+#include "xstrided_view.hpp"
+
+namespace xt
+{
+
+    /******************
+     * xaxis_iterator *
+     ******************/
+
+    /**
+     * @class xaxis_iterator
+     * @brief Class for iteration over (N-1)-dimensional slices, where
+     * N is the dimension of the underlying expression
+     *
+     * If N is the number of dimensions of an expression, the xaxis_iterator
+     * iterates over (N-1)-dimensional slices oriented along the specified axis.
+     *
+     * @tparam CT the closure type of the \ref xexpression
+     */
+    template <class CT>
+    class xaxis_iterator
+    {
+    public:
+
+        using self_type = xaxis_iterator<CT>;
+
+        using xexpression_type = std::decay_t<CT>;
+        using size_type = typename xexpression_type::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+        using shape_type = typename xexpression_type::shape_type;
+        using value_type = xstrided_view<CT, shape_type>;
+        using reference = std::remove_reference_t<apply_cv_t<CT, value_type>>;
+        using pointer = xtl::xclosure_pointer<std::remove_reference_t<apply_cv_t<CT, value_type>>>;
+
+        using iterator_category = std::forward_iterator_tag;
+
+        template <class CTA>
+        xaxis_iterator(CTA&& e, size_type axis);
+        template <class CTA>
+        xaxis_iterator(CTA&& e, size_type axis, size_type index, size_type offset);
+
+        self_type& operator++();
+        self_type operator++(int);
+
+        reference operator*() const;
+        pointer operator->() const;
+
+        bool equal(const self_type& rhs) const;
+
+    private:
+
+        using storing_type = xtl::ptr_closure_type_t<CT>;
+        mutable storing_type p_expression;
+        size_type m_index;
+        size_type m_add_offset;
+        value_type m_sv;
+
+        template <class T, class CTA>
+        std::enable_if_t<std::is_pointer<T>::value, T> get_storage_init(CTA&& e) const;
+
+        template <class T, class CTA>
+        std::enable_if_t<!std::is_pointer<T>::value, T> get_storage_init(CTA&& e) const;
+    };
+
+    template <class CT>
+    bool operator==(const xaxis_iterator<CT>& lhs, const xaxis_iterator<CT>& rhs);
+
+    template <class CT>
+    bool operator!=(const xaxis_iterator<CT>& lhs, const xaxis_iterator<CT>& rhs);
+
+    template <class E>
+    auto axis_begin(E&& e);
+
+    template <class E>
+    auto axis_begin(E&& e, typename std::decay_t<E>::size_type axis);
+
+    template <class E>
+    auto axis_end(E&& e);
+
+    template <class E>
+    auto axis_end(E&& e, typename std::decay_t<E>::size_type axis);
+
+    /*********************************
+     * xaxis_iterator implementation *
+     *********************************/
+
+    namespace detail
+    {
+        template <class CT>
+        auto derive_xstrided_view(
+            CT&& e,
+            typename std::decay_t<CT>::size_type axis,
+            typename std::decay_t<CT>::size_type offset
+        )
+        {
+            using xexpression_type = std::decay_t<CT>;
+            using shape_type = typename xexpression_type::shape_type;
+            using strides_type = typename xexpression_type::strides_type;
+
+            const auto& e_shape = e.shape();
+            shape_type shape(e_shape.size() - 1);
+            auto nxt = std::copy(e_shape.cbegin(), e_shape.cbegin() + axis, shape.begin());
+            std::copy(e_shape.cbegin() + axis + 1, e_shape.end(), nxt);
+
+            const auto& e_strides = e.strides();
+            strides_type strides(e_strides.size() - 1);
+            auto nxt_strides = std::copy(e_strides.cbegin(), e_strides.cbegin() + axis, strides.begin());
+            std::copy(e_strides.cbegin() + axis + 1, e_strides.end(), nxt_strides);
+
+            return strided_view(std::forward<CT>(e), std::move(shape), std::move(strides), offset, e.layout());
+        }
+    }
+
+    template <class CT>
+    template <class T, class CTA>
+    inline std::enable_if_t<std::is_pointer<T>::value, T> xaxis_iterator<CT>::get_storage_init(CTA&& e) const
+    {
+        return &e;
+    }
+
+    template <class CT>
+    template <class T, class CTA>
+    inline std::enable_if_t<!std::is_pointer<T>::value, T> xaxis_iterator<CT>::get_storage_init(CTA&& e) const
+    {
+        return e;
+    }
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Constructs an xaxis_iterator
+     *
+     * @param e the expression to iterate over
+     * @param axis the axis to iterate over taking N-1 dimensional slices
+     */
+    template <class CT>
+    template <class CTA>
+    inline xaxis_iterator<CT>::xaxis_iterator(CTA&& e, size_type axis)
+        : xaxis_iterator(std::forward<CTA>(e), axis, 0, e.data_offset())
+    {
+    }
+
+    /**
+     * Constructs an xaxis_iterator starting at specified index and offset
+     *
+     * @param e the expression to iterate over
+     * @param axis the axis to iterate over taking N-1 dimensional slices
+     * @param index the starting index for the iterator
+     * @param offset the starting offset for the iterator
+     */
+    template <class CT>
+    template <class CTA>
+    inline xaxis_iterator<CT>::xaxis_iterator(CTA&& e, size_type axis, size_type index, size_type offset)
+        : p_expression(get_storage_init<storing_type>(std::forward<CTA>(e)))
+        , m_index(index)
+        , m_add_offset(static_cast<size_type>(e.strides()[axis]))
+        , m_sv(detail::derive_xstrided_view<CTA>(std::forward<CTA>(e), axis, offset))
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Increment
+     */
+    //@{
+    /**
+     * Increments the iterator to the next position and returns it.
+     */
+    template <class CT>
+    inline auto xaxis_iterator<CT>::operator++() -> self_type&
+    {
+        m_sv.set_offset(m_sv.data_offset() + m_add_offset);
+        ++m_index;
+        return *this;
+    }
+
+    /**
+     * Makes a copy of the iterator, increments it to the next
+     * position, and returns the copy.
+     */
+    template <class CT>
+    inline auto xaxis_iterator<CT>::operator++(int) -> self_type
+    {
+        self_type tmp(*this);
+        ++(*this);
+        return tmp;
+    }
+
+    //@}
+
+    /**
+     * @name Reference
+     */
+    //@{
+    /**
+     * Returns the strided view at the current iteration position
+     *
+     * @return a strided_view
+     */
+    template <class CT>
+    inline auto xaxis_iterator<CT>::operator*() const -> reference
+    {
+        return m_sv;
+    }
+
+    /**
+     * Returns a pointer to the strided view at the current iteration position
+     *
+     * @return a pointer to a strided_view
+     */
+    template <class CT>
+    inline auto xaxis_iterator<CT>::operator->() const -> pointer
+    {
+        return xtl::closure_pointer(operator*());
+    }
+
+    //@}
+
+    /*
+     * @name Comparisons
+     */
+    //@{
+    /**
+     * Checks equality of the xaxis_slice_iterator and \c rhs.
+     *
+     * @param
+     * @return true if the iterators are equivalent, false otherwise
+     */
+    template <class CT>
+    inline bool xaxis_iterator<CT>::equal(const self_type& rhs) const
+    {
+        return p_expression == rhs.p_expression && m_index == rhs.m_index
+               && m_sv.data_offset() == rhs.m_sv.data_offset();
+    }
+
+    /**
+     * Checks equality of the iterators.
+     *
+     * @return true if the iterators are equivalent, false otherwise
+     */
+    template <class CT>
+    inline bool operator==(const xaxis_iterator<CT>& lhs, const xaxis_iterator<CT>& rhs)
+    {
+        return lhs.equal(rhs);
+    }
+
+    /**
+     * Checks inequality of the iterators
+     * @return true if the iterators are different, true otherwise
+     */
+    template <class CT>
+    inline bool operator!=(const xaxis_iterator<CT>& lhs, const xaxis_iterator<CT>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    //@}
+
+    /**
+     * @name Iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the expression for axis 0
+     *
+     * @param e the expession to iterate over
+     * @return an instance of xaxis_iterator
+     */
+    template <class E>
+    inline auto axis_begin(E&& e)
+    {
+        using return_type = xaxis_iterator<xtl::closure_type_t<E>>;
+        return return_type(std::forward<E>(e), 0);
+    }
+
+    /**
+     * Returns an iterator to the first element of the expression for the specified axis
+     *
+     * @param e the expession to iterate over
+     * @param axis the axis to iterate over
+     * @return an instance of xaxis_iterator
+     */
+    template <class E>
+    inline auto axis_begin(E&& e, typename std::decay_t<E>::size_type axis)
+    {
+        using return_type = xaxis_iterator<xtl::closure_type_t<E>>;
+        return return_type(std::forward<E>(e), axis);
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of
+     * the expression for axis 0
+     *
+     * @param e the expession to iterate over
+     * @return an instance of xaxis_iterator
+     */
+    template <class E>
+    inline auto axis_end(E&& e)
+    {
+        using size_type = typename std::decay_t<E>::size_type;
+        using return_type = xaxis_iterator<xtl::closure_type_t<E>>;
+        return return_type(
+            std::forward<E>(e),
+            0,
+            e.shape()[0],
+            static_cast<size_type>(e.strides()[0]) * e.shape()[0]
+        );
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of
+     * the expression for the specified axis
+     *
+     * @param e the expression to iterate over
+     * @param axis the axis to iterate over
+     * @return an instance of xaxis_iterator
+     */
+    template <class E>
+    inline auto axis_end(E&& e, typename std::decay_t<E>::size_type axis)
+    {
+        using size_type = typename std::decay_t<E>::size_type;
+        using return_type = xaxis_iterator<xtl::closure_type_t<E>>;
+        return return_type(
+            std::forward<E>(e),
+            axis,
+            e.shape()[axis],
+            static_cast<size_type>(e.strides()[axis]) * e.shape()[axis]
+        );
+    }
+
+    //@}
+}
+
+#endif

+ 367 - 0
3rd/numpy/include/xtensor/xaxis_slice_iterator.hpp

@@ -0,0 +1,367 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_AXIS_SLICE_ITERATOR_HPP
+#define XTENSOR_AXIS_SLICE_ITERATOR_HPP
+
+#include "xstrided_view.hpp"
+
+namespace xt
+{
+
+    /**
+     * @class xaxis_slice_iterator
+     * @brief Class for iteration over one-dimensional slices
+     *
+     * The xaxis_slice_iterator iterates over one-dimensional slices
+     * oriented along the specified axis
+     *
+     * @tparam CT the closure type of the \ref xexpression
+     */
+    template <class CT>
+    class xaxis_slice_iterator
+    {
+    public:
+
+        using self_type = xaxis_slice_iterator<CT>;
+
+        using xexpression_type = std::decay_t<CT>;
+        using size_type = typename xexpression_type::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+        using shape_type = typename xexpression_type::shape_type;
+        using strides_type = typename xexpression_type::strides_type;
+        using value_type = xstrided_view<CT, shape_type>;
+        using reference = std::remove_reference_t<apply_cv_t<CT, value_type>>;
+        using pointer = xtl::xclosure_pointer<std::remove_reference_t<apply_cv_t<CT, value_type>>>;
+
+        using iterator_category = std::forward_iterator_tag;
+
+        template <class CTA>
+        xaxis_slice_iterator(CTA&& e, size_type axis);
+        template <class CTA>
+        xaxis_slice_iterator(CTA&& e, size_type axis, size_type index, size_type offset);
+
+        self_type& operator++();
+        self_type operator++(int);
+
+        reference operator*() const;
+        pointer operator->() const;
+
+        bool equal(const self_type& rhs) const;
+
+    private:
+
+        using storing_type = xtl::ptr_closure_type_t<CT>;
+        mutable storing_type p_expression;
+        size_type m_index;
+        size_type m_offset;
+        size_type m_axis_stride;
+        size_type m_lower_shape;
+        size_type m_upper_shape;
+        size_type m_iter_size;
+        bool m_is_target_axis;
+        value_type m_sv;
+
+        template <class T, class CTA>
+        std::enable_if_t<std::is_pointer<T>::value, T> get_storage_init(CTA&& e) const;
+
+        template <class T, class CTA>
+        std::enable_if_t<!std::is_pointer<T>::value, T> get_storage_init(CTA&& e) const;
+    };
+
+    template <class CT>
+    bool operator==(const xaxis_slice_iterator<CT>& lhs, const xaxis_slice_iterator<CT>& rhs);
+
+    template <class CT>
+    bool operator!=(const xaxis_slice_iterator<CT>& lhs, const xaxis_slice_iterator<CT>& rhs);
+
+    template <class E>
+    auto xaxis_slice_begin(E&& e);
+
+    template <class E>
+    auto xaxis_slice_begin(E&& e, typename std::decay_t<E>::size_type axis);
+
+    template <class E>
+    auto xaxis_slice_end(E&& e);
+
+    template <class E>
+    auto xaxis_slice_end(E&& e, typename std::decay_t<E>::size_type axis);
+
+    /***************************************
+     * xaxis_slice_iterator implementation *
+     ***************************************/
+
+    template <class CT>
+    template <class T, class CTA>
+    inline std::enable_if_t<std::is_pointer<T>::value, T>
+    xaxis_slice_iterator<CT>::get_storage_init(CTA&& e) const
+    {
+        return &e;
+    }
+
+    template <class CT>
+    template <class T, class CTA>
+    inline std::enable_if_t<!std::is_pointer<T>::value, T>
+    xaxis_slice_iterator<CT>::get_storage_init(CTA&& e) const
+    {
+        return e;
+    }
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Constructs an xaxis_slice_iterator
+     *
+     * @param e the expression to iterate over
+     * @param axis the axis to iterate over taking one dimensional slices
+     */
+    template <class CT>
+    template <class CTA>
+    inline xaxis_slice_iterator<CT>::xaxis_slice_iterator(CTA&& e, size_type axis)
+        : xaxis_slice_iterator(std::forward<CTA>(e), axis, 0, e.data_offset())
+    {
+    }
+
+    /**
+     * Constructs an xaxis_slice_iterator starting at specified index and offset
+     *
+     * @param e the expression to iterate over
+     * @param axis the axis to iterate over taking one dimensional slices
+     * @param index the starting index for the iterator
+     * @param offset the starting offset for the iterator
+     */
+    template <class CT>
+    template <class CTA>
+    inline xaxis_slice_iterator<CT>::xaxis_slice_iterator(CTA&& e, size_type axis, size_type index, size_type offset)
+        : p_expression(get_storage_init<storing_type>(std::forward<CTA>(e)))
+        , m_index(index)
+        , m_offset(offset)
+        , m_axis_stride(static_cast<size_type>(e.strides()[axis]) * (e.shape()[axis] - 1u))
+        , m_lower_shape(0)
+        , m_upper_shape(0)
+        , m_iter_size(0)
+        , m_is_target_axis(false)
+        , m_sv(strided_view(
+              std::forward<CT>(e),
+              std::forward<shape_type>({e.shape()[axis]}),
+              std::forward<strides_type>({e.strides()[axis]}),
+              offset,
+              e.layout()
+          ))
+    {
+        if (e.layout() == layout_type::row_major)
+        {
+            m_is_target_axis = axis == e.dimension() - 1;
+            m_lower_shape = std::accumulate(
+                e.shape().begin() + axis + 1,
+                e.shape().end(),
+                size_t(1),
+                std::multiplies<>()
+            );
+            m_iter_size = std::accumulate(e.shape().begin() + 1, e.shape().end(), size_t(1), std::multiplies<>());
+        }
+        else
+        {
+            m_is_target_axis = axis == 0;
+            m_lower_shape = std::accumulate(
+                e.shape().begin(),
+                e.shape().begin() + axis,
+                size_t(1),
+                std::multiplies<>()
+            );
+            m_iter_size = std::accumulate(e.shape().begin(), e.shape().end() - 1, size_t(1), std::multiplies<>());
+        }
+        m_upper_shape = m_lower_shape + m_axis_stride;
+    }
+
+    //@}
+
+    /**
+     * @name Increment
+     */
+    //@{
+    /**
+     * Increments the iterator to the next position and returns it.
+     */
+    template <class CT>
+    inline auto xaxis_slice_iterator<CT>::operator++() -> self_type&
+    {
+        ++m_index;
+        ++m_offset;
+        auto index_compare = (m_offset % m_iter_size);
+        if (m_is_target_axis || (m_upper_shape >= index_compare && index_compare >= m_lower_shape))
+        {
+            m_offset += m_axis_stride;
+        }
+        m_sv.set_offset(m_offset);
+        return *this;
+    }
+
+    /**
+     * Makes a copy of the iterator, increments it to the next
+     * position, and returns the copy.
+     */
+    template <class CT>
+    inline auto xaxis_slice_iterator<CT>::operator++(int) -> self_type
+    {
+        self_type tmp(*this);
+        ++(*this);
+        return tmp;
+    }
+
+    //@}
+
+    /**
+     * @name Reference
+     */
+    //@{
+    /**
+     * Returns the strided view at the current iteration position
+     *
+     * @return a strided_view
+     */
+    template <class CT>
+    inline auto xaxis_slice_iterator<CT>::operator*() const -> reference
+    {
+        return m_sv;
+    }
+
+    /**
+     * Returns a pointer to the strided view at the current iteration position
+     *
+     * @return a pointer to a strided_view
+     */
+    template <class CT>
+    inline auto xaxis_slice_iterator<CT>::operator->() const -> pointer
+    {
+        return xtl::closure_pointer(operator*());
+    }
+
+    //@}
+
+    /*
+     * @name Comparisons
+     */
+    //@{
+    /**
+     * Checks equality of the xaxis_slice_iterator and \c rhs.
+     *
+     * @return true if the iterators are equivalent, false otherwise
+     */
+    template <class CT>
+    inline bool xaxis_slice_iterator<CT>::equal(const self_type& rhs) const
+    {
+        return p_expression == rhs.p_expression && m_index == rhs.m_index;
+    }
+
+    /**
+     * Checks equality of the iterators.
+     *
+     * @return true if the iterators are equivalent, false otherwise
+     */
+    template <class CT>
+    inline bool operator==(const xaxis_slice_iterator<CT>& lhs, const xaxis_slice_iterator<CT>& rhs)
+    {
+        return lhs.equal(rhs);
+    }
+
+    /**
+     * Checks inequality of the iterators
+     * @return true if the iterators are different, true otherwise
+     */
+    template <class CT>
+    inline bool operator!=(const xaxis_slice_iterator<CT>& lhs, const xaxis_slice_iterator<CT>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    //@}
+
+    /**
+     * @name Iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the expression for axis 0
+     *
+     * @param e the expession to iterate over
+     * @return an instance of xaxis_slice_iterator
+     */
+    template <class E>
+    inline auto axis_slice_begin(E&& e)
+    {
+        using return_type = xaxis_slice_iterator<xtl::closure_type_t<E>>;
+        return return_type(std::forward<E>(e), 0);
+    }
+
+    /**
+     * Returns an iterator to the first element of the expression for the specified axis
+     *
+     * @param e the expession to iterate over
+     * @param axis the axis to iterate over
+     * @return an instance of xaxis_slice_iterator
+     */
+    template <class E>
+    inline auto axis_slice_begin(E&& e, typename std::decay_t<E>::size_type axis)
+    {
+        using return_type = xaxis_slice_iterator<xtl::closure_type_t<E>>;
+        return return_type(std::forward<E>(e), axis, 0, e.data_offset());
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of
+     * the expression for axis 0
+     *
+     * @param e the expession to iterate over
+     * @return an instance of xaxis_slice_iterator
+     */
+    template <class E>
+    inline auto axis_slice_end(E&& e)
+    {
+        using return_type = xaxis_slice_iterator<xtl::closure_type_t<E>>;
+        return return_type(
+            std::forward<E>(e),
+            0,
+            std::accumulate(e.shape().begin() + 1, e.shape().end(), size_t(1), std::multiplies<>()),
+            e.size()
+        );
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of
+     * the expression for the specified axis
+     *
+     * @param e the expression to iterate over
+     * @param axis the axis to iterate over
+     * @return an instance of xaxis_slice_iterator
+     */
+    template <class E>
+    inline auto axis_slice_end(E&& e, typename std::decay_t<E>::size_type axis)
+    {
+        using return_type = xaxis_slice_iterator<xtl::closure_type_t<E>>;
+        auto index_sum = std::accumulate(
+            e.shape().begin(),
+            e.shape().begin() + axis,
+            size_t(1),
+            std::multiplies<>()
+        );
+        return return_type(
+            std::forward<E>(e),
+            axis,
+            std::accumulate(e.shape().begin() + axis + 1, e.shape().end(), index_sum, std::multiplies<>()),
+            e.size() + axis
+        );
+    }
+
+    //@}
+}
+
+#endif

+ 533 - 0
3rd/numpy/include/xtensor/xblockwise_reducer.hpp

@@ -0,0 +1,533 @@
+#ifndef XTENSOR_XBLOCKWISE_REDUCER_HPP
+#define XTENSOR_XBLOCKWISE_REDUCER_HPP
+
+#include "xblockwise_reducer_functors.hpp"
+#include "xmultiindex_iterator.hpp"
+#include "xreducer.hpp"
+#include "xshape.hpp"
+#include "xtl/xclosure.hpp"
+#include "xtl/xsequence.hpp"
+
+namespace xt
+{
+
+    template <class CT, class F, class X, class O>
+    class xblockwise_reducer
+    {
+    public:
+
+        using self_type = xblockwise_reducer<CT, F, X, O>;
+        using raw_options_type = std::decay_t<O>;
+        using keep_dims = xtl::mpl::contains<raw_options_type, xt::keep_dims_type>;
+        using xexpression_type = std::decay_t<CT>;
+        using shape_type = typename xreducer_shape_type<typename xexpression_type::shape_type, std::decay_t<X>, keep_dims>::type;
+        using functor_type = F;
+        using value_type = typename functor_type::value_type;
+        using input_shape_type = typename xexpression_type::shape_type;
+        using input_chunk_index_type = filter_fixed_shape_t<input_shape_type>;
+        using input_grid_strides = filter_fixed_shape_t<input_shape_type>;
+        using axes_type = X;
+        using chunk_shape_type = filter_fixed_shape_t<shape_type>;
+
+
+        template <class E, class BS, class XX, class OO, class FF>
+        xblockwise_reducer(E&& e, BS&& block_shape, XX&& axes, OO&& options, FF&& functor);
+
+        const input_shape_type& input_shape() const;
+        const axes_type& axes() const;
+
+        std::size_t dimension() const;
+
+        const shape_type& shape() const;
+
+        const chunk_shape_type& chunk_shape() const;
+
+        template <class R>
+        void assign_to(R& result) const;
+
+    private:
+
+        using mapping_type = filter_fixed_shape_t<shape_type>;
+        using input_chunked_view_type = xchunked_view<const std::decay_t<CT>&>;
+        using input_const_chunked_iterator_type = typename input_chunked_view_type::const_chunk_iterator;
+        using input_chunk_range_type = std::array<xmultiindex_iterator<input_chunk_index_type>, 2>;
+
+        template <class CI>
+        void assign_to_chunk(CI& result_chunk_iter) const;
+
+        template <class CI>
+        input_chunk_range_type compute_input_chunk_range(CI& result_chunk_iter) const;
+
+        input_const_chunked_iterator_type get_input_chunk_iter(input_chunk_index_type input_chunk_index) const;
+        void init_shapes();
+
+        CT m_e;
+        xchunked_view<const std::decay_t<CT>&> m_e_chunked_view;
+        axes_type m_axes;
+        raw_options_type m_options;
+        functor_type m_functor;
+        shape_type m_result_shape;
+        chunk_shape_type m_result_chunk_shape;
+        mapping_type m_mapping;
+        input_grid_strides m_input_grid_strides;
+    };
+
+    template <class CT, class F, class X, class O>
+    template <class E, class BS, class XX, class OO, class FF>
+    xblockwise_reducer<CT, F, X, O>::xblockwise_reducer(E&& e, BS&& block_shape, XX&& axes, OO&& options, FF&& functor)
+        : m_e(std::forward<E>(e))
+        , m_e_chunked_view(m_e, std::forward<BS>(block_shape))
+        , m_axes(std::forward<XX>(axes))
+        , m_options(std::forward<OO>(options))
+        , m_functor(std::forward<FF>(functor))
+        , m_result_shape()
+        , m_result_chunk_shape()
+        , m_mapping()
+        , m_input_grid_strides()
+    {
+        init_shapes();
+        resize_container(m_input_grid_strides, m_e.dimension());
+        std::size_t stride = 1;
+
+        for (std::size_t i = m_input_grid_strides.size(); i != 0; --i)
+        {
+            m_input_grid_strides[i - 1] = stride;
+            stride *= m_e_chunked_view.grid_shape()[i - 1];
+        }
+    }
+
+    template <class CT, class F, class X, class O>
+    inline auto xblockwise_reducer<CT, F, X, O>::input_shape() const -> const input_shape_type&
+    {
+        return m_e.shape();
+    }
+
+    template <class CT, class F, class X, class O>
+    inline auto xblockwise_reducer<CT, F, X, O>::axes() const -> const axes_type&
+    {
+        return m_axes;
+    }
+
+    template <class CT, class F, class X, class O>
+    inline std::size_t xblockwise_reducer<CT, F, X, O>::dimension() const
+    {
+        return m_result_shape.size();
+    }
+
+    template <class CT, class F, class X, class O>
+    inline auto xblockwise_reducer<CT, F, X, O>::shape() const -> const shape_type&
+    {
+        return m_result_shape;
+    }
+
+    template <class CT, class F, class X, class O>
+    inline auto xblockwise_reducer<CT, F, X, O>::chunk_shape() const -> const chunk_shape_type&
+    {
+        return m_result_chunk_shape;
+    }
+
+    template <class CT, class F, class X, class O>
+    template <class R>
+    inline void xblockwise_reducer<CT, F, X, O>::assign_to(R& result) const
+    {
+        auto result_chunked_view = as_chunked(result, m_result_chunk_shape);
+        for (auto chunk_iter = result_chunked_view.chunk_begin(); chunk_iter != result_chunked_view.chunk_end();
+             ++chunk_iter)
+        {
+            assign_to_chunk(chunk_iter);
+        }
+    }
+
+    template <class CT, class F, class X, class O>
+    auto xblockwise_reducer<CT, F, X, O>::get_input_chunk_iter(input_chunk_index_type input_chunk_index) const
+        -> input_const_chunked_iterator_type
+    {
+        std::size_t chunk_linear_index = 0;
+        for (std::size_t i = 0; i < m_e_chunked_view.dimension(); ++i)
+        {
+            chunk_linear_index += input_chunk_index[i] * m_input_grid_strides[i];
+        }
+        return input_const_chunked_iterator_type(m_e_chunked_view, std::move(input_chunk_index), chunk_linear_index);
+    }
+
+    template <class CT, class F, class X, class O>
+    template <class CI>
+    void xblockwise_reducer<CT, F, X, O>::assign_to_chunk(CI& result_chunk_iter) const
+    {
+        auto result_chunk_view = *result_chunk_iter;
+        auto reduction_variable = m_functor.reduction_variable(result_chunk_view);
+
+        // get the range of input chunks we need to compute the desired ouput chunk
+        auto range = compute_input_chunk_range(result_chunk_iter);
+
+        // iterate over input chunk (indics)
+        auto first = true;
+        // std::for_each(std::get<0>(range), std::get<1>(range), [&](auto && input_chunk_index)
+        auto iter = std::get<0>(range);
+        while (iter != std::get<1>(range))
+        {
+            const auto& input_chunk_index = *iter;
+            // get input chunk iterator from chunk index
+            auto chunked_input_iter = this->get_input_chunk_iter(input_chunk_index);
+            auto input_chunk_view = *chunked_input_iter;
+
+            // compute the per block result
+            auto block_res = m_functor.compute(input_chunk_view, m_axes, m_options);
+
+            // merge
+            m_functor.merge(block_res, first, result_chunk_view, reduction_variable);
+            first = false;
+            ++iter;
+        }
+
+        // finalize (ie smth like normalization)
+        m_functor.finalize(reduction_variable, result_chunk_view, *this);
+    }
+
+    template <class CT, class F, class X, class O>
+    template <class CI>
+    auto xblockwise_reducer<CT, F, X, O>::compute_input_chunk_range(CI& result_chunk_iter) const
+        -> input_chunk_range_type
+    {
+        auto input_chunks_begin = xtl::make_sequence<input_chunk_index_type>(m_e_chunked_view.dimension(), 0);
+        auto input_chunks_end = xtl::make_sequence<input_chunk_index_type>(m_e_chunked_view.dimension());
+
+        XTENSOR_ASSERT(input_chunks_begin.size() == m_e_chunked_view.dimension());
+        XTENSOR_ASSERT(input_chunks_end.size() == m_e_chunked_view.dimension());
+
+        std::copy(
+            m_e_chunked_view.grid_shape().begin(),
+            m_e_chunked_view.grid_shape().end(),
+            input_chunks_end.begin()
+        );
+
+        const auto& chunk_index = result_chunk_iter.chunk_index();
+        for (std::size_t result_ax_index = 0; result_ax_index < m_result_shape.size(); ++result_ax_index)
+        {
+            if (m_result_shape[result_ax_index] != 1)
+            {
+                const auto input_ax_index = m_mapping[result_ax_index];
+                input_chunks_begin[input_ax_index] = chunk_index[result_ax_index];
+                input_chunks_end[input_ax_index] = chunk_index[result_ax_index] + 1;
+            }
+        }
+        return input_chunk_range_type{
+            multiindex_iterator_begin<input_chunk_index_type>(input_chunks_begin, input_chunks_end),
+            multiindex_iterator_end<input_chunk_index_type>(input_chunks_begin, input_chunks_end)
+        };
+    }
+
+    template <class CT, class F, class X, class O>
+    void xblockwise_reducer<CT, F, X, O>::init_shapes()
+    {
+        const auto& shape = m_e.shape();
+        const auto dimension = m_e.dimension();
+        const auto& block_shape = m_e_chunked_view.chunk_shape();
+        if (xtl::mpl::contains<raw_options_type, xt::keep_dims_type>::value)
+        {
+            resize_container(m_result_shape, dimension);
+            resize_container(m_result_chunk_shape, dimension);
+            resize_container(m_mapping, dimension);
+            for (std::size_t i = 0; i < dimension; ++i)
+            {
+                m_mapping[i] = i;
+                if (std::find(m_axes.begin(), m_axes.end(), i) == m_axes.end())
+                {
+                    // i not in m_axes!
+                    m_result_shape[i] = shape[i];
+                    m_result_chunk_shape[i] = block_shape[i];
+                }
+                else
+                {
+                    m_result_shape[i] = 1;
+                    m_result_chunk_shape[i] = 1;
+                }
+            }
+        }
+        else
+        {
+            const auto result_dim = dimension - m_axes.size();
+            resize_container(m_result_shape, result_dim);
+            resize_container(m_result_chunk_shape, result_dim);
+            resize_container(m_mapping, result_dim);
+
+            for (std::size_t i = 0, idx = 0; i < dimension; ++i)
+            {
+                if (std::find(m_axes.begin(), m_axes.end(), i) == m_axes.end())
+                {
+                    // i not in axes!
+                    m_result_shape[idx] = shape[i];
+                    m_result_chunk_shape[idx] = block_shape[i];
+                    m_mapping[idx] = i;
+                    ++idx;
+                }
+            }
+        }
+    }
+
+    template <class E, class CS, class A, class O, class FF>
+    inline auto blockwise_reducer(E&& e, CS&& chunk_shape, A&& axes, O&& raw_options, FF&& functor)
+    {
+        using functor_type = std::decay_t<FF>;
+        using closure_type = xtl::const_closure_type_t<E>;
+        using axes_type = std::decay_t<A>;
+
+        return xblockwise_reducer<closure_type, functor_type, axes_type, O>(
+            std::forward<E>(e),
+            std::forward<CS>(chunk_shape),
+            std::forward<A>(axes),
+            std::forward<O>(raw_options),
+            std::forward<FF>(functor)
+        );
+    }
+
+    namespace blockwise
+    {
+
+#define XTENSOR_BLOCKWISE_REDUCER_FUNC(FNAME, FUNCTOR)                                                        \
+    template <                                                                                                \
+        class T = void,                                                                                       \
+        class E,                                                                                              \
+        class BS,                                                                                             \
+        class X,                                                                                              \
+        class O = DEFAULT_STRATEGY_REDUCERS,                                                                  \
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<xtl::is_integral<std::decay_t<X>>>)> \
+    auto FNAME(E&& e, BS&& block_shape, X&& axes, O options = O())                                            \
+    {                                                                                                         \
+        using input_expression_type = std::decay_t<E>;                                                        \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type, T>;                          \
+        return blockwise_reducer(                                                                             \
+            std::forward<E>(e),                                                                               \
+            std::forward<BS>(block_shape),                                                                    \
+            std::forward<X>(axes),                                                                            \
+            std::forward<O>(options),                                                                         \
+            functor_type()                                                                                    \
+        );                                                                                                    \
+    }                                                                                                         \
+    template <                                                                                                \
+        class T = void,                                                                                       \
+        class E,                                                                                              \
+        class BS,                                                                                             \
+        class X,                                                                                              \
+        class O = DEFAULT_STRATEGY_REDUCERS,                                                                  \
+        XTL_REQUIRES(xtl::is_integral<std::decay_t<X>>)>                                                      \
+    auto FNAME(E&& e, BS&& block_shape, X axis, O options = O())                                              \
+    {                                                                                                         \
+        std::array<X, 1> axes{axis};                                                                          \
+        using input_expression_type = std::decay_t<E>;                                                        \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type, T>;                          \
+        return blockwise_reducer(                                                                             \
+            std::forward<E>(e),                                                                               \
+            std::forward<BS>(block_shape),                                                                    \
+            axes,                                                                                             \
+            std::forward<O>(options),                                                                         \
+            functor_type()                                                                                    \
+        );                                                                                                    \
+    }                                                                                                         \
+    template <                                                                                                \
+        class T = void,                                                                                       \
+        class E,                                                                                              \
+        class BS,                                                                                             \
+        class O = DEFAULT_STRATEGY_REDUCERS,                                                                  \
+        XTL_REQUIRES(is_reducer_options<O>, xtl::negation<xtl::is_integral<std::decay_t<O>>>)>                \
+    auto FNAME(E&& e, BS&& block_shape, O options = O())                                                      \
+    {                                                                                                         \
+        using input_expression_type = std::decay_t<E>;                                                        \
+        using axes_type = filter_fixed_shape_t<typename input_expression_type::shape_type>;                   \
+        axes_type axes = xtl::make_sequence<axes_type>(e.dimension());                                        \
+        XTENSOR_ASSERT(axes.size() == e.dimension());                                                         \
+        std::iota(axes.begin(), axes.end(), 0);                                                               \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type, T>;                          \
+        return blockwise_reducer(                                                                             \
+            std::forward<E>(e),                                                                               \
+            std::forward<BS>(block_shape),                                                                    \
+            std::move(axes),                                                                                  \
+            std::forward<O>(options),                                                                         \
+            functor_type()                                                                                    \
+        );                                                                                                    \
+    }                                                                                                         \
+    template <class T = void, class E, class BS, class I, std::size_t N, class O = DEFAULT_STRATEGY_REDUCERS> \
+    auto FNAME(E&& e, BS&& block_shape, const I(&axes)[N], O options = O())                                   \
+    {                                                                                                         \
+        using input_expression_type = std::decay_t<E>;                                                        \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type, T>;                          \
+        using axes_type = std::array<std::size_t, N>;                                                         \
+        auto ax = xt::forward_normalize<axes_type>(e, axes);                                                  \
+        return blockwise_reducer(                                                                             \
+            std::forward<E>(e),                                                                               \
+            std::forward<BS>(block_shape),                                                                    \
+            std::move(ax),                                                                                    \
+            std::forward<O>(options),                                                                         \
+            functor_type()                                                                                    \
+        );                                                                                                    \
+    }
+        XTENSOR_BLOCKWISE_REDUCER_FUNC(sum, xt::detail::blockwise::sum_functor)
+        XTENSOR_BLOCKWISE_REDUCER_FUNC(prod, xt::detail::blockwise::prod_functor)
+        XTENSOR_BLOCKWISE_REDUCER_FUNC(amin, xt::detail::blockwise::amin_functor)
+        XTENSOR_BLOCKWISE_REDUCER_FUNC(amax, xt::detail::blockwise::amax_functor)
+        XTENSOR_BLOCKWISE_REDUCER_FUNC(mean, xt::detail::blockwise::mean_functor)
+        XTENSOR_BLOCKWISE_REDUCER_FUNC(variance, xt::detail::blockwise::variance_functor)
+        XTENSOR_BLOCKWISE_REDUCER_FUNC(stddev, xt::detail::blockwise::stddev_functor)
+
+#undef XTENSOR_BLOCKWISE_REDUCER_FUNC
+
+
+// norm reducers do *not* allow to to pass a template
+// parameter to specifiy the internal computation type
+#define XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(FNAME, FUNCTOR)                                                                     \
+    template <                                                                                                                  \
+        class E,                                                                                                                \
+        class BS,                                                                                                               \
+        class X,                                                                                                                \
+        class O = DEFAULT_STRATEGY_REDUCERS,                                                                                    \
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<xtl::is_integral<std::decay_t<X>>>)>                   \
+    auto FNAME(E&& e, BS&& block_shape, X&& axes, O options = O())                                                              \
+    {                                                                                                                           \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            std::forward<X>(axes),                                                                                              \
+            std::forward<O>(options),                                                                                           \
+            functor_type()                                                                                                      \
+        );                                                                                                                      \
+    }                                                                                                                           \
+    template <class E, class BS, class X, class O = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(xtl::is_integral<std::decay_t<X>>)> \
+    auto FNAME(E&& e, BS&& block_shape, X axis, O options = O())                                                                \
+    {                                                                                                                           \
+        std::array<X, 1> axes{axis};                                                                                            \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            axes,                                                                                                               \
+            std::forward<O>(options),                                                                                           \
+            functor_type()                                                                                                      \
+        );                                                                                                                      \
+    }                                                                                                                           \
+    template <                                                                                                                  \
+        class E,                                                                                                                \
+        class BS,                                                                                                               \
+        class O = DEFAULT_STRATEGY_REDUCERS,                                                                                    \
+        XTL_REQUIRES(is_reducer_options<O>, xtl::negation<xtl::is_integral<std::decay_t<O>>>)>                                  \
+    auto FNAME(E&& e, BS&& block_shape, O options = O())                                                                        \
+    {                                                                                                                           \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using axes_type = filter_fixed_shape_t<typename input_expression_type::shape_type>;                                     \
+        axes_type axes = xtl::make_sequence<axes_type>(e.dimension());                                                          \
+        XTENSOR_ASSERT(axes.size() == e.dimension());                                                                           \
+        std::iota(axes.begin(), axes.end(), 0);                                                                                 \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            std::move(axes),                                                                                                    \
+            std::forward<O>(options),                                                                                           \
+            functor_type()                                                                                                      \
+        );                                                                                                                      \
+    }                                                                                                                           \
+    template <class E, class BS, class I, std::size_t N, class O = DEFAULT_STRATEGY_REDUCERS>                                   \
+    auto FNAME(E&& e, BS&& block_shape, const I(&axes)[N], O options = O())                                                     \
+    {                                                                                                                           \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        using axes_type = std::array<std::size_t, N>;                                                                           \
+        auto ax = xt::forward_normalize<axes_type>(e, axes);                                                                    \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            std::move(ax),                                                                                                      \
+            std::forward<O>(options),                                                                                           \
+            functor_type()                                                                                                      \
+        );                                                                                                                      \
+    }
+        XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(norm_l0, xt::detail::blockwise::norm_l0_functor)
+        XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(norm_l1, xt::detail::blockwise::norm_l1_functor)
+        XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(norm_l2, xt::detail::blockwise::norm_l2_functor)
+        XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(norm_sq, xt::detail::blockwise::norm_sq_functor)
+        XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(norm_linf, xt::detail::blockwise::norm_linf_functor)
+
+#undef XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC
+
+
+#define XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(FNAME, FUNCTOR)                                                                     \
+    template <                                                                                                                  \
+        class E,                                                                                                                \
+        class BS,                                                                                                               \
+        class X,                                                                                                                \
+        class O = DEFAULT_STRATEGY_REDUCERS,                                                                                    \
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<xtl::is_integral<std::decay_t<X>>>)>                   \
+    auto FNAME(E&& e, BS&& block_shape, double p, X&& axes, O options = O())                                                    \
+    {                                                                                                                           \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            std::forward<X>(axes),                                                                                              \
+            std::forward<O>(options),                                                                                           \
+            functor_type(p)                                                                                                     \
+        );                                                                                                                      \
+    }                                                                                                                           \
+    template <class E, class BS, class X, class O = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(xtl::is_integral<std::decay_t<X>>)> \
+    auto FNAME(E&& e, BS&& block_shape, double p, X axis, O options = O())                                                      \
+    {                                                                                                                           \
+        std::array<X, 1> axes{axis};                                                                                            \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            axes,                                                                                                               \
+            std::forward<O>(options),                                                                                           \
+            functor_type(p)                                                                                                     \
+        );                                                                                                                      \
+    }                                                                                                                           \
+    template <                                                                                                                  \
+        class E,                                                                                                                \
+        class BS,                                                                                                               \
+        class O = DEFAULT_STRATEGY_REDUCERS,                                                                                    \
+        XTL_REQUIRES(is_reducer_options<O>, xtl::negation<xtl::is_integral<std::decay_t<O>>>)>                                  \
+    auto FNAME(E&& e, BS&& block_shape, double p, O options = O())                                                              \
+    {                                                                                                                           \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using axes_type = filter_fixed_shape_t<typename input_expression_type::shape_type>;                                     \
+        axes_type axes = xtl::make_sequence<axes_type>(e.dimension());                                                          \
+        XTENSOR_ASSERT(axes.size() == e.dimension());                                                                           \
+        std::iota(axes.begin(), axes.end(), 0);                                                                                 \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            std::move(axes),                                                                                                    \
+            std::forward<O>(options),                                                                                           \
+            functor_type(p)                                                                                                     \
+        );                                                                                                                      \
+    }                                                                                                                           \
+    template <class E, class BS, class I, std::size_t N, class O = DEFAULT_STRATEGY_REDUCERS>                                   \
+    auto FNAME(E&& e, BS&& block_shape, double p, const I(&axes)[N], O options = O())                                           \
+    {                                                                                                                           \
+        using input_expression_type = std::decay_t<E>;                                                                          \
+        using functor_type = FUNCTOR<typename input_expression_type::value_type>;                                               \
+        using axes_type = std::array<std::size_t, N>;                                                                           \
+        auto ax = xt::forward_normalize<axes_type>(e, axes);                                                                    \
+        return blockwise_reducer(                                                                                               \
+            std::forward<E>(e),                                                                                                 \
+            std::forward<BS>(block_shape),                                                                                      \
+            std::move(ax),                                                                                                      \
+            std::forward<O>(options),                                                                                           \
+            functor_type(p)                                                                                                     \
+        );                                                                                                                      \
+    }
+
+        XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(norm_lp_to_p, xt::detail::blockwise::norm_lp_to_p_functor);
+        XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC(norm_lp, xt::detail::blockwise::norm_lp_functor);
+
+#undef XTENSOR_BLOCKWISE_NORM_REDUCER_FUNC
+    }
+
+}
+
+#endif

+ 503 - 0
3rd/numpy/include/xtensor/xblockwise_reducer_functors.hpp

@@ -0,0 +1,503 @@
+#ifndef XTENSOR_XBLOCKWISE_REDUCER_FUNCTORS_HPP
+#define XTENSOR_XBLOCKWISE_REDUCER_FUNCTORS_HPP
+
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <typeinfo>
+
+#include "xarray.hpp"
+#include "xbuilder.hpp"
+#include "xchunked_array.hpp"
+#include "xchunked_assign.hpp"
+#include "xchunked_view.hpp"
+#include "xexpression.hpp"
+#include "xmath.hpp"
+#include "xnorm.hpp"
+#include "xreducer.hpp"
+#include "xtl/xclosure.hpp"
+#include "xtl/xsequence.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    namespace detail
+    {
+        namespace blockwise
+        {
+
+            struct empty_reduction_variable
+            {
+            };
+
+            struct simple_functor_base
+            {
+                template <class E>
+                auto reduction_variable(const E&) const
+                {
+                    return empty_reduction_variable();
+                }
+
+                template <class MR, class E, class R>
+                void finalize(const MR&, E&, const R&) const
+                {
+                }
+            };
+
+            template <class T_E, class T_I = void>
+            struct sum_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::sum<T_I>(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(input, axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+            };
+
+            template <class T_E, class T_I = void>
+            struct prod_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::sum<T_I>(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::prod<value_type>(input, axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) *= block_result;
+                    }
+                }
+            };
+
+            template <class T_E, class T_I = void>
+            struct amin_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::amin<T_I>(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::amin(input, axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) = xt::minimum(block_result, result);
+                    }
+                }
+            };
+
+            template <class T_E, class T_I = void>
+            struct amax_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::amax<T_I>(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::amax(input, axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) = xt::maximum(block_result, result);
+                    }
+                }
+            };
+
+            template <class T_E, class T_I = void>
+            struct mean_functor
+            {
+                using value_type = typename std::decay_t<decltype(xt::mean<T_I>(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(input, axes, options);
+                }
+
+                template <class E>
+                auto reduction_variable(const E&) const
+                {
+                    return empty_reduction_variable();
+                }
+
+                template <class BR, class E>
+                auto merge(const BR& block_result, bool first, E& result, empty_reduction_variable&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+
+                template <class E, class R>
+                void finalize(const empty_reduction_variable&, E& results, const R& reducer) const
+                {
+                    const auto& axes = reducer.axes();
+                    std::decay_t<decltype(reducer.input_shape()[0])> factor = 1;
+                    for (auto a : axes)
+                    {
+                        factor *= reducer.input_shape()[a];
+                    }
+                    xt::noalias(results) /= static_cast<typename E::value_type>(factor);
+                }
+            };
+
+            template <class T_E, class T_I = void>
+            struct variance_functor
+            {
+                using value_type = typename std::decay_t<decltype(xt::variance<T_I>(std::declval<xarray<T_E>>())
+                )>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    double weight = 1.0;
+                    for (auto a : axes)
+                    {
+                        weight *= static_cast<double>(input.shape()[a]);
+                    }
+
+
+                    return std::make_tuple(
+                        xt::variance<value_type>(input, axes, options),
+                        xt::mean<value_type>(input, axes, options),
+                        weight
+                    );
+                }
+
+                template <class E>
+                auto reduction_variable(const E&) const
+                {
+                    return std::make_tuple(xarray<value_type>(), 0.0);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& variance_a, MR& mr) const
+                {
+                    auto& mean_a = std::get<0>(mr);
+                    auto& n_a = std::get<1>(mr);
+
+                    const auto& variance_b = std::get<0>(block_result);
+                    const auto& mean_b = std::get<1>(block_result);
+                    const auto& n_b = std::get<2>(block_result);
+                    if (first)
+                    {
+                        xt::noalias(variance_a) = variance_b;
+                        xt::noalias(mean_a) = mean_b;
+                        n_a += n_b;
+                    }
+                    else
+                    {
+                        auto new_mean = (n_a * mean_a + n_b * mean_b) / (n_a + n_b);
+                        auto new_variance = (n_a * variance_a + n_b * variance_b
+                                             + n_a * xt::pow(mean_a - new_mean, 2)
+                                             + n_b * xt::pow(mean_b - new_mean, 2))
+                                            / (n_a + n_b);
+                        xt::noalias(variance_a) = new_variance;
+                        xt::noalias(mean_a) = new_mean;
+                        n_a += n_b;
+                    }
+                }
+
+                template <class MR, class E, class R>
+                void finalize(const MR&, E&, const R&) const
+                {
+                }
+            };
+
+            template <class T_E, class T_I = void>
+            struct stddev_functor : public variance_functor<T_E, T_I>
+            {
+                template <class MR, class E, class R>
+                void finalize(const MR&, E& results, const R&) const
+                {
+                    xt::noalias(results) = xt::sqrt(results);
+                }
+            };
+
+            template <class T_E>
+            struct norm_l0_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::norm_l0(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(xt::not_equal(input, xt::zeros<T_E>(input.shape())), axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+            };
+
+            template <class T_E>
+            struct norm_l1_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::norm_l1(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(xt::abs(input), axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+            };
+
+            template <class T_E>
+            struct norm_l2_functor
+            {
+                using value_type = typename std::decay_t<decltype(xt::norm_l2(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(xt::square(input), axes, options);
+                }
+
+                template <class E>
+                auto reduction_variable(const E&) const
+                {
+                    return empty_reduction_variable();
+                }
+
+                template <class BR, class E>
+                auto merge(const BR& block_result, bool first, E& result, empty_reduction_variable&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+
+                template <class E, class R>
+                void finalize(const empty_reduction_variable&, E& results, const R&) const
+                {
+                    xt::noalias(results) = xt::sqrt(results);
+                }
+            };
+
+            template <class T_E>
+            struct norm_sq_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::norm_sq(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(xt::square(input), axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+            };
+
+            template <class T_E>
+            struct norm_linf_functor : public simple_functor_base
+            {
+                using value_type = typename std::decay_t<decltype(xt::norm_linf(std::declval<xarray<T_E>>()))>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::amax<value_type>(xt::abs(input), axes, options);
+                }
+
+                template <class BR, class E, class MR>
+                auto merge(const BR& block_result, bool first, E& result, MR&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) = xt::maximum(block_result, result);
+                    }
+                }
+            };
+
+            template <class T_E>
+            class norm_lp_to_p_functor
+            {
+            public:
+
+                using value_type = typename std::decay_t<
+                    decltype(xt::norm_lp_to_p(std::declval<xarray<T_E>>(), 1.0))>::value_type;
+
+                norm_lp_to_p_functor(double p)
+                    : m_p(p)
+                {
+                }
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(xt::pow(input, m_p), axes, options);
+                }
+
+                template <class E>
+                auto reduction_variable(const E&) const
+                {
+                    return empty_reduction_variable();
+                }
+
+                template <class BR, class E>
+                auto merge(const BR& block_result, bool first, E& result, empty_reduction_variable&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+
+                template <class E, class R>
+                void finalize(const empty_reduction_variable&, E&, const R&) const
+                {
+                }
+
+            private:
+
+                double m_p;
+            };
+
+            template <class T_E>
+            class norm_lp_functor
+            {
+            public:
+
+                norm_lp_functor(double p)
+                    : m_p(p)
+                {
+                }
+
+                using value_type = typename std::decay_t<decltype(xt::norm_lp(std::declval<xarray<T_E>>(), 1.0)
+                )>::value_type;
+
+                template <class E, class A, class O>
+                auto compute(const E& input, const A& axes, const O& options) const
+                {
+                    return xt::sum<value_type>(xt::pow(input, m_p), axes, options);
+                }
+
+                template <class E>
+                auto reduction_variable(const E&) const
+                {
+                    return empty_reduction_variable();
+                }
+
+                template <class BR, class E>
+                auto merge(const BR& block_result, bool first, E& result, empty_reduction_variable&) const
+                {
+                    if (first)
+                    {
+                        xt::noalias(result) = block_result;
+                    }
+                    else
+                    {
+                        xt::noalias(result) += block_result;
+                    }
+                }
+
+                template <class E, class R>
+                void finalize(const empty_reduction_variable&, E& results, const R&) const
+                {
+                    results = xt::pow(results, 1.0 / m_p);
+                }
+
+            private:
+
+                double m_p;
+            };
+
+
+        }
+    }
+}
+
+#endif

+ 482 - 0
3rd/numpy/include/xtensor/xbroadcast.hpp

@@ -0,0 +1,482 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_BROADCAST_HPP
+#define XTENSOR_BROADCAST_HPP
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <iterator>
+#include <numeric>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xsequence.hpp>
+
+#include "xaccessible.hpp"
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+#include "xscalar.hpp"
+#include "xstrides.hpp"
+#include "xtensor_config.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    /*************
+     * broadcast *
+     *************/
+
+    template <class E, class S>
+    auto broadcast(E&& e, const S& s);
+
+    template <class E, class I, std::size_t L>
+    auto broadcast(E&& e, const I (&s)[L]);
+
+    /*************************
+     * xbroadcast extensions *
+     *************************/
+
+    namespace extension
+    {
+        template <class Tag, class CT, class X>
+        struct xbroadcast_base_impl;
+
+        template <class CT, class X>
+        struct xbroadcast_base_impl<xtensor_expression_tag, CT, X>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class CT, class X>
+        struct xbroadcast_base : xbroadcast_base_impl<xexpression_tag_t<CT>, CT, X>
+        {
+        };
+
+        template <class CT, class X>
+        using xbroadcast_base_t = typename xbroadcast_base<CT, X>::type;
+    }
+
+    /**************
+     * xbroadcast *
+     **************/
+
+    template <class CT, class X>
+    class xbroadcast;
+
+    template <class CT, class X>
+    struct xiterable_inner_types<xbroadcast<CT, X>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using inner_shape_type = promote_shape_t<typename xexpression_type::shape_type, X>;
+        using const_stepper = typename xexpression_type::const_stepper;
+        using stepper = const_stepper;
+    };
+
+    template <class CT, class X>
+    struct xcontainer_inner_types<xbroadcast<CT, X>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using reference = typename xexpression_type::const_reference;
+        using const_reference = typename xexpression_type::const_reference;
+        using size_type = typename xexpression_type::size_type;
+    };
+
+    /*****************************
+     * linear_begin / linear_end *
+     *****************************/
+
+    template <class CT, class X>
+    XTENSOR_CONSTEXPR_RETURN auto linear_begin(xbroadcast<CT, X>& c) noexcept
+    {
+        return linear_begin(c.expression());
+    }
+
+    template <class CT, class X>
+    XTENSOR_CONSTEXPR_RETURN auto linear_end(xbroadcast<CT, X>& c) noexcept
+    {
+        return linear_end(c.expression());
+    }
+
+    template <class CT, class X>
+    XTENSOR_CONSTEXPR_RETURN auto linear_begin(const xbroadcast<CT, X>& c) noexcept
+    {
+        return linear_begin(c.expression());
+    }
+
+    template <class CT, class X>
+    XTENSOR_CONSTEXPR_RETURN auto linear_end(const xbroadcast<CT, X>& c) noexcept
+    {
+        return linear_end(c.expression());
+    }
+
+    /*************************************
+     * overlapping_memory_checker_traits *
+     *************************************/
+
+    template <class E>
+    struct overlapping_memory_checker_traits<
+        E,
+        std::enable_if_t<!has_memory_address<E>::value && is_specialization_of<xbroadcast, E>::value>>
+    {
+        static bool check_overlap(const E& expr, const memory_range& dst_range)
+        {
+            if (expr.size() == 0)
+            {
+                return false;
+            }
+            else
+            {
+                using ChildE = std::decay_t<decltype(expr.expression())>;
+                return overlapping_memory_checker_traits<ChildE>::check_overlap(expr.expression(), dst_range);
+            }
+        }
+    };
+
+    /**
+     * @class xbroadcast
+     * @brief Broadcasted xexpression to a specified shape.
+     *
+     * The xbroadcast class implements the broadcasting of an \ref xexpression
+     * to a specified shape. xbroadcast is not meant to be used directly, but
+     * only with the \ref broadcast helper functions.
+     *
+     * @tparam CT the closure type of the \ref xexpression to broadcast
+     * @tparam X the type of the specified shape.
+     *
+     * @sa broadcast
+     */
+    template <class CT, class X>
+    class xbroadcast : public xsharable_expression<xbroadcast<CT, X>>,
+                       public xconst_iterable<xbroadcast<CT, X>>,
+                       public xconst_accessible<xbroadcast<CT, X>>,
+                       public extension::xbroadcast_base_t<CT, X>
+    {
+    public:
+
+        using self_type = xbroadcast<CT, X>;
+        using xexpression_type = std::decay_t<CT>;
+        using accessible_base = xconst_accessible<self_type>;
+        using extension_base = extension::xbroadcast_base_t<CT, X>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using inner_types = xcontainer_inner_types<self_type>;
+        using value_type = typename xexpression_type::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = typename xexpression_type::const_pointer;
+        using const_pointer = typename xexpression_type::const_pointer;
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+
+        using iterable_base = xconst_iterable<self_type>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using shape_type = inner_shape_type;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        using bool_load_type = typename xexpression_type::bool_load_type;
+
+        static constexpr layout_type static_layout = layout_type::dynamic;
+        static constexpr bool contiguous_layout = false;
+
+        template <class CTA, class S>
+        xbroadcast(CTA&& e, const S& s);
+
+        template <class CTA>
+        xbroadcast(CTA&& e, shape_type&& s);
+
+        using accessible_base::size;
+        const inner_shape_type& shape() const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+        using accessible_base::shape;
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        const xexpression_type& expression() const noexcept;
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type l) const noexcept;
+
+        template <class E, class XCT = CT, class = std::enable_if_t<xt::is_xscalar<XCT>::value>>
+        void assign_to(xexpression<E>& e) const;
+
+        template <class E>
+        using rebind_t = xbroadcast<E, X>;
+
+        template <class E>
+        rebind_t<E> build_broadcast(E&& e) const;
+
+    private:
+
+        CT m_e;
+        inner_shape_type m_shape;
+    };
+
+    /****************************
+     * broadcast implementation *
+     ****************************/
+
+    /**
+     * @brief Returns an \ref xexpression broadcasting the given expression to
+     * a specified shape.
+     *
+     * @tparam e the \ref xexpression to broadcast
+     * @tparam s the specified shape to broadcast.
+     *
+     * The returned expression either hold a const reference to \p e or a copy
+     * depending on whether \p e is an lvalue or an rvalue.
+     */
+    template <class E, class S>
+    inline auto broadcast(E&& e, const S& s)
+    {
+        using shape_type = filter_fixed_shape_t<std::decay_t<S>>;
+        using broadcast_type = xbroadcast<const_xclosure_t<E>, shape_type>;
+        return broadcast_type(std::forward<E>(e), xtl::forward_sequence<shape_type, decltype(s)>(s));
+    }
+
+    template <class E, class I, std::size_t L>
+    inline auto broadcast(E&& e, const I (&s)[L])
+    {
+        using broadcast_type = xbroadcast<const_xclosure_t<E>, std::array<std::size_t, L>>;
+        using shape_type = typename broadcast_type::shape_type;
+        return broadcast_type(std::forward<E>(e), xtl::forward_sequence<shape_type, decltype(s)>(s));
+    }
+
+    /*****************************
+     * xbroadcast implementation *
+     *****************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs an xbroadcast expression broadcasting the specified
+     * \ref xexpression to the given shape
+     *
+     * @param e the expression to broadcast
+     * @param s the shape to apply
+     */
+    template <class CT, class X>
+    template <class CTA, class S>
+    inline xbroadcast<CT, X>::xbroadcast(CTA&& e, const S& s)
+        : m_e(std::forward<CTA>(e))
+    {
+        if (s.size() < m_e.dimension())
+        {
+            XTENSOR_THROW(xt::broadcast_error, "Broadcast shape has fewer elements than original expression.");
+        }
+        xt::resize_container(m_shape, s.size());
+        std::copy(s.begin(), s.end(), m_shape.begin());
+        xt::broadcast_shape(m_e.shape(), m_shape);
+    }
+
+    /**
+     * Constructs an xbroadcast expression broadcasting the specified
+     * \ref xexpression to the given shape
+     *
+     * @param e the expression to broadcast
+     * @param s the shape to apply
+     */
+    template <class CT, class X>
+    template <class CTA>
+    inline xbroadcast<CT, X>::xbroadcast(CTA&& e, shape_type&& s)
+        : m_e(std::forward<CTA>(e))
+        , m_shape(std::move(s))
+    {
+        xt::broadcast_shape(m_e.shape(), m_shape);
+    }
+
+    //@}
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the shape of the expression.
+     */
+    template <class CT, class X>
+    inline auto xbroadcast<CT, X>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    /**
+     * Returns the layout_type of the expression.
+     */
+    template <class CT, class X>
+    inline layout_type xbroadcast<CT, X>::layout() const noexcept
+    {
+        return m_e.layout();
+    }
+
+    template <class CT, class X>
+    inline bool xbroadcast<CT, X>::is_contiguous() const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+    //@{
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the function. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the expression.
+     */
+    template <class CT, class X>
+    template <class... Args>
+    inline auto xbroadcast<CT, X>::operator()(Args... args) const -> const_reference
+    {
+        return m_e(args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the expression, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class CT, class X>
+    template <class... Args>
+    inline auto xbroadcast<CT, X>::unchecked(Args... args) const -> const_reference
+    {
+        return this->operator()(args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the function.
+     */
+    template <class CT, class X>
+    template <class It>
+    inline auto xbroadcast<CT, X>::element(It, It last) const -> const_reference
+    {
+        return m_e.element(last - this->dimension(), last);
+    }
+
+    /**
+     * Returns a constant reference to the underlying expression of the broadcast expression.
+     */
+    template <class CT, class X>
+    inline auto xbroadcast<CT, X>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_e;
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the function to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class CT, class X>
+    template <class S>
+    inline bool xbroadcast<CT, X>::broadcast_shape(S& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    /**
+     * Checks whether the xbroadcast can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class CT, class X>
+    template <class S>
+    inline bool xbroadcast<CT, X>::has_linear_assign(const S& strides) const noexcept
+    {
+        return this->dimension() == m_e.dimension()
+               && std::equal(m_shape.cbegin(), m_shape.cend(), m_e.shape().cbegin())
+               && m_e.has_linear_assign(strides);
+    }
+
+    //@}
+
+    template <class CT, class X>
+    template <class S>
+    inline auto xbroadcast<CT, X>::stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        // Could check if (broadcastable(shape, m_shape)
+        return m_e.stepper_begin(shape);
+    }
+
+    template <class CT, class X>
+    template <class S>
+    inline auto xbroadcast<CT, X>::stepper_end(const S& shape, layout_type l) const noexcept -> const_stepper
+    {
+        // Could check if (broadcastable(shape, m_shape)
+        return m_e.stepper_end(shape, l);
+    }
+
+    template <class CT, class X>
+    template <class E, class XCT, class>
+    inline void xbroadcast<CT, X>::assign_to(xexpression<E>& e) const
+    {
+        auto& ed = e.derived_cast();
+        ed.resize(m_shape);
+        std::fill(ed.begin(), ed.end(), m_e());
+    }
+
+    template <class CT, class X>
+    template <class E>
+    inline auto xbroadcast<CT, X>::build_broadcast(E&& e) const -> rebind_t<E>
+    {
+        return rebind_t<E>(std::forward<E>(e), inner_shape_type(m_shape));
+    }
+}
+
+#endif

+ 1282 - 0
3rd/numpy/include/xtensor/xbuffer_adaptor.hpp

@@ -0,0 +1,1282 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_BUFFER_ADAPTOR_HPP
+#define XTENSOR_BUFFER_ADAPTOR_HPP
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <memory>
+#include <stdexcept>
+
+#include <xtl/xclosure.hpp>
+
+#include "xstorage.hpp"
+#include "xtensor_config.hpp"
+
+namespace xt
+{
+
+    struct no_ownership
+    {
+    };
+
+    using smart_ownership = no_ownership;
+
+    struct acquire_ownership
+    {
+    };
+
+    template <class CP, class O = no_ownership, class A = std::allocator<std::remove_pointer_t<std::remove_reference_t<CP>>>>
+    class xbuffer_adaptor;
+
+    /********************
+     * buffer_storage_t *
+     ********************/
+
+    namespace detail
+    {
+        template <class CP, class A>
+        class xbuffer_storage
+        {
+        public:
+
+            using self_type = xbuffer_storage<CP, A>;
+            using allocator_type = A;
+            using destructor_type = allocator_type;
+            using allocator_traits = std::allocator_traits<allocator_type>;
+            using value_type = typename allocator_traits::value_type;
+            using reference = std::conditional_t<
+                std::is_const<std::remove_pointer_t<std::remove_reference_t<CP>>>::value,
+                const value_type&,
+                value_type&>;
+            using const_reference = const value_type&;
+            using pointer = std::conditional_t<
+                std::is_const<std::remove_pointer_t<std::remove_reference_t<CP>>>::value,
+                typename allocator_traits::const_pointer,
+                typename allocator_traits::pointer>;
+            using const_pointer = typename allocator_traits::const_pointer;
+            using size_type = typename allocator_traits::size_type;
+            using difference_type = typename allocator_traits::difference_type;
+
+            xbuffer_storage();
+
+            template <class P>
+            xbuffer_storage(P&& data, size_type size, const allocator_type& alloc = allocator_type());
+
+            size_type size() const noexcept;
+            void resize(size_type size);
+
+            pointer data() noexcept;
+            const_pointer data() const noexcept;
+
+            void swap(self_type& rhs) noexcept;
+
+            template <class P>
+            void reset_data(P&& data, size_type size) noexcept;
+
+        private:
+
+            pointer p_data;
+            size_type m_size;
+        };
+
+        template <class CP, class D>
+        class xbuffer_smart_pointer
+        {
+        public:
+
+            using self_type = xbuffer_storage<CP, D>;
+            using destructor_type = D;
+            using value_type = std::remove_const_t<std::remove_pointer_t<std::remove_reference_t<CP>>>;
+            using allocator_type = std::allocator<value_type>;
+            using allocator_traits = std::allocator_traits<allocator_type>;
+            using reference = std::conditional_t<
+                std::is_const<std::remove_pointer_t<std::remove_reference_t<CP>>>::value,
+                const value_type&,
+                value_type&>;
+            using const_reference = const value_type&;
+            using pointer = std::conditional_t<
+                std::is_const<std::remove_pointer_t<std::remove_reference_t<CP>>>::value,
+                typename allocator_traits::const_pointer,
+                typename allocator_traits::pointer>;
+            using const_pointer = typename allocator_traits::const_pointer;
+            using size_type = typename allocator_traits::size_type;
+            using difference_type = typename allocator_traits::difference_type;
+
+            xbuffer_smart_pointer();
+
+            template <class P, class DT>
+            xbuffer_smart_pointer(P&& data_ptr, size_type size, DT&& destruct);
+
+            size_type size() const noexcept;
+            void resize(size_type size);
+
+            pointer data() noexcept;
+            const_pointer data() const noexcept;
+
+            void swap(self_type& rhs) noexcept;
+
+            template <class P, class DT>
+            void reset_data(P&& data, size_type size, DT&& destruct) noexcept;
+
+        private:
+
+            pointer p_data;
+            size_type m_size;
+            destructor_type m_destruct;
+        };
+
+        template <class CP, class A>
+        class xbuffer_owner_storage
+        {
+        public:
+
+            using self_type = xbuffer_owner_storage<CP, A>;
+            using allocator_type = A;
+            using destructor_type = allocator_type;
+            using allocator_traits = std::allocator_traits<allocator_type>;
+            using value_type = typename allocator_traits::value_type;
+            using reference = std::conditional_t<
+                std::is_const<std::remove_pointer_t<std::remove_reference_t<CP>>>::value,
+                const value_type&,
+                value_type&>;
+            using const_reference = const value_type&;
+            using pointer = std::conditional_t<
+                std::is_const<std::remove_pointer_t<std::remove_reference_t<CP>>>::value,
+                typename allocator_traits::const_pointer,
+                typename allocator_traits::pointer>;
+            using const_pointer = typename allocator_traits::const_pointer;
+            using size_type = typename allocator_traits::size_type;
+            using difference_type = typename allocator_traits::difference_type;
+
+            xbuffer_owner_storage() = default;
+
+            template <class P>
+            xbuffer_owner_storage(P&& data, size_type size, const allocator_type& alloc = allocator_type());
+
+            ~xbuffer_owner_storage();
+
+            xbuffer_owner_storage(const self_type&) = delete;
+            self_type& operator=(const self_type&);
+
+            xbuffer_owner_storage(self_type&&);
+            self_type& operator=(self_type&&);
+
+            size_type size() const noexcept;
+            void resize(size_type size);
+
+            pointer data() noexcept;
+            const_pointer data() const noexcept;
+
+            allocator_type get_allocator() const noexcept;
+
+            void swap(self_type& rhs) noexcept;
+
+            template <class P>
+            void reset_data(P&& data, size_type size, const allocator_type& alloc = allocator_type()) noexcept;
+
+        private:
+
+            xtl::xclosure_wrapper<CP> m_data;
+            size_type m_size;
+            bool m_moved_from;
+            allocator_type m_allocator;
+        };
+
+        // Workaround for MSVC2015: using void_t results in some
+        // template instantiation caching that leads to wrong
+        // type deduction later in xfunction.
+        template <class T>
+        struct msvc2015_void
+        {
+            using type = void;
+        };
+
+        template <class T>
+        using msvc2015_void_t = typename msvc2015_void<T>::type;
+
+        template <class E, class = void>
+        struct is_lambda_type : std::false_type
+        {
+        };
+
+        // check if operator() is available
+        template <class E>
+        struct is_lambda_type<E, msvc2015_void_t<decltype(&E::operator())>> : std::true_type
+        {
+        };
+
+        template <class T>
+        struct self_type
+        {
+            using type = T;
+        };
+
+        template <class CP, class A, class O>
+        struct get_buffer_storage
+        {
+            using type = xtl::mpl::eval_if_t<
+                is_lambda_type<A>,
+                self_type<xbuffer_smart_pointer<CP, A>>,
+                self_type<xbuffer_storage<CP, A>>>;
+        };
+
+        template <class CP, class A>
+        struct get_buffer_storage<CP, A, acquire_ownership>
+        {
+            using type = xbuffer_owner_storage<CP, A>;
+        };
+
+        template <class CP, class T>
+        struct get_buffer_storage<CP, std::shared_ptr<T>, no_ownership>
+        {
+            using type = xbuffer_smart_pointer<CP, std::shared_ptr<T>>;
+        };
+
+        template <class CP, class T>
+        struct get_buffer_storage<CP, std::unique_ptr<T>, no_ownership>
+        {
+            using type = xbuffer_smart_pointer<CP, std::unique_ptr<T>>;
+        };
+
+        template <class CP, class A, class O>
+        using buffer_storage_t = typename get_buffer_storage<CP, A, O>::type;
+    }
+
+    /************************
+     * xbuffer_adaptor_base *
+     ************************/
+
+    template <class D>
+    struct buffer_inner_types;
+
+    template <class D>
+    class xbuffer_adaptor_base
+    {
+    public:
+
+        using self_type = xbuffer_adaptor_base<D>;
+        using derived_type = D;
+        using inner_types = buffer_inner_types<D>;
+        using value_type = typename inner_types::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = typename inner_types::pointer;
+        using const_pointer = typename inner_types::const_pointer;
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename inner_types::difference_type;
+        using iterator = typename inner_types::iterator;
+        using const_iterator = typename inner_types::const_iterator;
+        using reverse_iterator = typename inner_types::reverse_iterator;
+        using const_reverse_iterator = typename inner_types::const_reverse_iterator;
+        using index_type = typename inner_types::index_type;
+
+        bool empty() const noexcept;
+
+        reference operator[](size_type i);
+        const_reference operator[](size_type i) const;
+
+        reference front();
+        const_reference front() const;
+
+        reference back();
+        const_reference back() const;
+
+        iterator begin() noexcept;
+        iterator end() noexcept;
+
+        const_iterator begin() const noexcept;
+        const_iterator end() const noexcept;
+        const_iterator cbegin() const noexcept;
+        const_iterator cend() const noexcept;
+
+        reverse_iterator rbegin() noexcept;
+        reverse_iterator rend() noexcept;
+
+        const_reverse_iterator rbegin() const noexcept;
+        const_reverse_iterator rend() const noexcept;
+        const_reverse_iterator crbegin() const noexcept;
+        const_reverse_iterator crend() const noexcept;
+
+        derived_type& derived_cast() noexcept;
+        const derived_type& derived_cast() const noexcept;
+
+    protected:
+
+        xbuffer_adaptor_base() = default;
+        ~xbuffer_adaptor_base() = default;
+
+        xbuffer_adaptor_base(const self_type&) = default;
+        self_type& operator=(const self_type&) = default;
+
+        xbuffer_adaptor_base(self_type&&) = default;
+        self_type& operator=(self_type&&) = default;
+    };
+
+    template <class D>
+    bool operator==(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs);
+
+    template <class D>
+    bool operator!=(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs);
+
+    template <class D>
+    bool operator<(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs);
+
+    template <class D>
+    bool operator<=(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs);
+
+    template <class D>
+    bool operator>(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs);
+
+    template <class D>
+    bool operator>=(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs);
+
+    /*******************
+     * xbuffer_adaptor *
+     *******************/
+
+    template <class CP, class O, class A>
+    struct buffer_inner_types<xbuffer_adaptor<CP, O, A>>
+    {
+        using base_type = detail::buffer_storage_t<CP, A, O>;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+        using size_type = typename base_type::size_type;
+        using difference_type = typename base_type::difference_type;
+        using iterator = pointer;
+        using const_iterator = const_pointer;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+        using index_type = size_type;
+    };
+
+    template <class CP, class O, class A>
+    class xbuffer_adaptor : private detail::buffer_storage_t<CP, A, O>,
+                            public xbuffer_adaptor_base<xbuffer_adaptor<CP, O, A>>
+    {
+    public:
+
+        using self_type = xbuffer_adaptor<CP, O, A>;
+        using base_type = detail::buffer_storage_t<CP, A, O>;
+        using buffer_base_type = xbuffer_adaptor_base<self_type>;
+        using allocator_type = typename base_type::allocator_type;
+        using destructor_type = typename base_type::destructor_type;
+        using value_type = typename buffer_base_type::value_type;
+        using reference = typename buffer_base_type::reference;
+        using const_reference = typename buffer_base_type::const_reference;
+        using pointer = typename buffer_base_type::pointer;
+        using const_pointer = typename buffer_base_type::const_pointer;
+        using size_type = typename buffer_base_type::size_type;
+        using difference_type = typename buffer_base_type::difference_type;
+        using iterator = typename buffer_base_type::iterator;
+        using const_iterator = typename buffer_base_type::const_iterator;
+        using reverse_iterator = typename buffer_base_type::reverse_iterator;
+        using const_reverse_iterator = typename buffer_base_type::const_reverse_iterator;
+        using temporary_type = uvector<value_type, allocator_type>;
+
+        xbuffer_adaptor() = default;
+
+        using base_type::base_type;
+
+        ~xbuffer_adaptor() = default;
+
+        xbuffer_adaptor(const self_type&) = default;
+        self_type& operator=(const self_type&) = default;
+
+        xbuffer_adaptor(self_type&&) = default;
+        xbuffer_adaptor& operator=(self_type&&) = default;
+
+        self_type& operator=(temporary_type&&);
+
+        using base_type::data;
+        using base_type::reset_data;
+        using base_type::resize;
+        using base_type::size;
+        using base_type::swap;
+    };
+
+    template <class CP, class O, class A>
+    void swap(xbuffer_adaptor<CP, O, A>& lhs, xbuffer_adaptor<CP, O, A>& rhs) noexcept;
+
+    /*********************
+     * xiterator_adaptor *
+     *********************/
+
+    template <class I, class CI>
+    class xiterator_adaptor;
+
+    template <class I, class CI>
+    struct buffer_inner_types<xiterator_adaptor<I, CI>>
+    {
+        using traits = std::iterator_traits<I>;
+        using const_traits = std::iterator_traits<CI>;
+
+        using value_type = std::common_type_t<typename traits::value_type, typename const_traits::value_type>;
+        using reference = typename traits::reference;
+        using const_reference = typename const_traits::reference;
+        using pointer = typename traits::pointer;
+        using const_pointer = typename const_traits::pointer;
+        using difference_type = std::common_type_t<typename traits::difference_type, typename const_traits::difference_type>;
+        using size_type = std::make_unsigned_t<difference_type>;
+
+        using iterator = I;
+        using const_iterator = CI;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+        using index_type = difference_type;
+    };
+
+    template <class I, class CI>
+    class xiterator_adaptor : public xbuffer_adaptor_base<xiterator_adaptor<I, CI>>
+    {
+    public:
+
+        using self_type = xiterator_adaptor<I, CI>;
+        using base_type = xbuffer_adaptor_base<self_type>;
+        using value_type = typename base_type::value_type;
+        using allocator_type = std::allocator<value_type>;
+        using size_type = typename base_type::size_type;
+        using iterator = typename base_type::iterator;
+        using const_iterator = typename base_type::const_iterator;
+        using temporary_type = uvector<value_type, allocator_type>;
+
+        xiterator_adaptor() = default;
+        xiterator_adaptor(I it, CI cit, size_type size);
+
+        ~xiterator_adaptor() = default;
+
+        xiterator_adaptor(const self_type&) = default;
+        xiterator_adaptor& operator=(const self_type&) = default;
+
+        xiterator_adaptor(self_type&&) = default;
+        xiterator_adaptor& operator=(self_type&&) = default;
+
+        xiterator_adaptor& operator=(const temporary_type& rhs);
+        xiterator_adaptor& operator=(temporary_type&& rhs);
+
+        size_type size() const noexcept;
+        void resize(size_type size);
+
+        iterator data() noexcept;
+        const_iterator data() const noexcept;
+
+        void swap(self_type& rhs) noexcept;
+
+    private:
+
+        I m_it;
+        CI m_cit;
+        size_type m_size;
+    };
+
+    template <class I, class CI>
+    void swap(xiterator_adaptor<I, CI>& lhs, xiterator_adaptor<I, CI>& rhs) noexcept;
+
+    template <class I, class CI>
+    struct is_contiguous_container<xiterator_adaptor<I, CI>> : is_contiguous_container<I>
+    {
+    };
+
+    /***************************
+     * xiterator_owner_adaptor *
+     ***************************/
+
+    template <class C, class IG>
+    class xiterator_owner_adaptor;
+
+    template <class C, class IG>
+    struct buffer_inner_types<xiterator_owner_adaptor<C, IG>>
+    {
+        using iterator = typename IG::iterator;
+        using const_iterator = typename IG::const_iterator;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+        using traits = std::iterator_traits<iterator>;
+        using const_traits = std::iterator_traits<const_iterator>;
+
+        using value_type = std::common_type_t<typename traits::value_type, typename const_traits::value_type>;
+        using reference = typename traits::reference;
+        using const_reference = typename const_traits::reference;
+        using pointer = typename traits::pointer;
+        using const_pointer = typename const_traits::pointer;
+        using difference_type = std::common_type_t<typename traits::difference_type, typename const_traits::difference_type>;
+        using size_type = std::make_unsigned_t<difference_type>;
+        using index_type = difference_type;
+    };
+
+    template <class C, class IG>
+    class xiterator_owner_adaptor : public xbuffer_adaptor_base<xiterator_owner_adaptor<C, IG>>
+    {
+    public:
+
+        using self_type = xiterator_owner_adaptor<C, IG>;
+        using base_type = xbuffer_adaptor_base<self_type>;
+        using value_type = typename base_type::value_type;
+        using allocator_type = std::allocator<value_type>;
+        using size_type = typename base_type::size_type;
+        using iterator = typename base_type::iterator;
+        using const_iterator = typename base_type::const_iterator;
+        using temporary_type = uvector<value_type, allocator_type>;
+
+        xiterator_owner_adaptor(C&& c);
+
+        ~xiterator_owner_adaptor() = default;
+
+        xiterator_owner_adaptor(const self_type&);
+        xiterator_owner_adaptor& operator=(const self_type&);
+
+        xiterator_owner_adaptor(self_type&&);
+        xiterator_owner_adaptor& operator=(self_type&&);
+
+        xiterator_owner_adaptor& operator=(const temporary_type& rhs);
+        xiterator_owner_adaptor& operator=(temporary_type&& rhs);
+
+        size_type size() const noexcept;
+        void resize(size_type size);
+
+        iterator data() noexcept;
+        const_iterator data() const noexcept;
+
+        void swap(self_type& rhs) noexcept;
+
+    private:
+
+        void init_iterators();
+
+        C m_container;
+        iterator m_it;
+        const_iterator m_cit;
+        size_type m_size;
+    };
+
+    template <class C, class IG>
+    void swap(xiterator_owner_adaptor<C, IG>& lhs, xiterator_owner_adaptor<C, IG>& rhs) noexcept;
+
+    template <class C, class IG>
+    struct is_contiguous_container<xiterator_owner_adaptor<C, IG>>
+        : is_contiguous_container<typename IG::iterator>
+    {
+    };
+
+    /**************************
+     * make_xiterator_adaptor *
+     **************************/
+
+    template <class C, class IG>
+    auto make_xiterator_adaptor(C&& container, IG iterator_getter);
+
+    /************************************
+     * temporary_container metafunction *
+     ************************************/
+
+    template <class C>
+    struct temporary_container
+    {
+        using type = C;
+    };
+
+    template <class CP, class O, class A>
+    struct temporary_container<xbuffer_adaptor<CP, O, A>>
+    {
+        using type = typename xbuffer_adaptor<CP, O, A>::temporary_type;
+    };
+
+    template <class I, class CI>
+    struct temporary_container<xiterator_adaptor<I, CI>>
+    {
+        using type = typename xiterator_adaptor<I, CI>::temporary_type;
+    };
+
+    template <class C, class IG>
+    struct temporary_container<xiterator_owner_adaptor<C, IG>>
+    {
+        using type = typename xiterator_owner_adaptor<C, IG>::temporary_type;
+    };
+
+    template <class C>
+    using temporary_container_t = typename temporary_container<C>::type;
+
+    /**********************************
+     * xbuffer_storage implementation *
+     **********************************/
+
+    namespace detail
+    {
+        template <class CP, class A>
+        inline xbuffer_storage<CP, A>::xbuffer_storage()
+            : p_data(nullptr)
+            , m_size(0)
+        {
+        }
+
+        template <class CP, class A>
+        template <class P>
+        inline xbuffer_storage<CP, A>::xbuffer_storage(P&& data, size_type size, const allocator_type&)
+            : p_data(std::forward<P>(data))
+            , m_size(size)
+        {
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_storage<CP, A>::size() const noexcept -> size_type
+        {
+            return m_size;
+        }
+
+        template <class CP, class A>
+        inline void xbuffer_storage<CP, A>::resize(size_type size)
+        {
+            if (size != m_size)
+            {
+                XTENSOR_THROW(std::runtime_error, "xbuffer_storage not resizable");
+            }
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_storage<CP, A>::data() noexcept -> pointer
+        {
+            return p_data;
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_storage<CP, A>::data() const noexcept -> const_pointer
+        {
+            return p_data;
+        }
+
+        template <class CP, class A>
+        inline void xbuffer_storage<CP, A>::swap(self_type& rhs) noexcept
+        {
+            using std::swap;
+            swap(p_data, rhs.p_data);
+            swap(m_size, rhs.m_size);
+        }
+
+        template <class CP, class A>
+        template <class P>
+        inline void xbuffer_storage<CP, A>::reset_data(P&& data, size_type size) noexcept
+        {
+            p_data = std::forward<P>(data);
+            m_size = size;
+        }
+    }
+
+    /****************************************
+     * xbuffer_owner_storage implementation *
+     ****************************************/
+
+    namespace detail
+    {
+        template <class CP, class A>
+        template <class P>
+        inline xbuffer_owner_storage<CP, A>::xbuffer_owner_storage(P&& data, size_type size, const allocator_type& alloc)
+            : m_data(std::forward<P>(data))
+            , m_size(size)
+            , m_moved_from(false)
+            , m_allocator(alloc)
+        {
+        }
+
+        template <class CP, class A>
+        inline xbuffer_owner_storage<CP, A>::~xbuffer_owner_storage()
+        {
+            if (!m_moved_from)
+            {
+                safe_destroy_deallocate(m_allocator, m_data.get(), m_size);
+                m_size = 0;
+            }
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_owner_storage<CP, A>::operator=(const self_type& rhs) -> self_type&
+        {
+            using std::swap;
+            if (this != &rhs)
+            {
+                allocator_type al = std::allocator_traits<allocator_type>::select_on_container_copy_construction(
+                    rhs.get_allocator()
+                );
+                pointer tmp = safe_init_allocate(al, rhs.m_size);
+                if (xtrivially_default_constructible<value_type>::value)
+                {
+                    std::uninitialized_copy(rhs.m_data.get(), rhs.m_data.get() + rhs.m_size, tmp);
+                }
+                else
+                {
+                    std::copy(rhs.m_data.get(), rhs.m_data.get() + rhs.m_size, tmp);
+                }
+                swap(m_data.get(), tmp);
+                swap(m_allocator, al);
+                safe_destroy_deallocate(al, tmp, m_size);
+                m_size = rhs.m_size;
+            }
+            return *this;
+        }
+
+        template <class CP, class A>
+        inline xbuffer_owner_storage<CP, A>::xbuffer_owner_storage(self_type&& rhs)
+            : m_data(std::move(rhs.m_data))
+            , m_size(std::move(rhs.m_size))
+            , m_moved_from(std::move(rhs.m_moved_from))
+            , m_allocator(std::move(rhs.m_allocator))
+        {
+            rhs.m_moved_from = true;
+            rhs.m_size = 0;
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_owner_storage<CP, A>::operator=(self_type&& rhs) -> self_type&
+        {
+            swap(rhs);
+            return *this;
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_owner_storage<CP, A>::size() const noexcept -> size_type
+        {
+            return m_size;
+        }
+
+        template <class CP, class A>
+        void xbuffer_owner_storage<CP, A>::resize(size_type size)
+        {
+            using std::swap;
+            if (size != m_size)
+            {
+                pointer tmp = safe_init_allocate(m_allocator, size);
+                swap(m_data.get(), tmp);
+                swap(m_size, size);
+                safe_destroy_deallocate(m_allocator, tmp, size);
+            }
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_owner_storage<CP, A>::data() noexcept -> pointer
+        {
+            return m_data.get();
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_owner_storage<CP, A>::data() const noexcept -> const_pointer
+        {
+            return m_data.get();
+        }
+
+        template <class CP, class A>
+        inline auto xbuffer_owner_storage<CP, A>::get_allocator() const noexcept -> allocator_type
+        {
+            return allocator_type(m_allocator);
+        }
+
+        template <class CP, class A>
+        inline void xbuffer_owner_storage<CP, A>::swap(self_type& rhs) noexcept
+        {
+            using std::swap;
+            swap(m_data, rhs.m_data);
+            swap(m_size, rhs.m_size);
+            swap(m_allocator, rhs.m_allocator);
+        }
+
+        template <class CP, class A>
+        template <class P>
+        inline void
+        xbuffer_owner_storage<CP, A>::reset_data(P&& data, size_type size, const allocator_type& alloc) noexcept
+        {
+            xbuffer_owner_storage<CP, A> tmp(std::forward<P>(data), size, alloc);
+            this->swap(tmp);
+        }
+    }
+
+    /****************************************
+     * xbuffer_smart_pointer implementation *
+     ****************************************/
+
+    namespace detail
+    {
+        template <class CP, class D>
+        template <class P, class DT>
+        xbuffer_smart_pointer<CP, D>::xbuffer_smart_pointer(P&& data_ptr, size_type size, DT&& destruct)
+            : p_data(data_ptr)
+            , m_size(size)
+            , m_destruct(std::forward<DT>(destruct))
+        {
+        }
+
+        template <class CP, class D>
+        auto xbuffer_smart_pointer<CP, D>::size() const noexcept -> size_type
+        {
+            return m_size;
+        }
+
+        template <class CP, class D>
+        void xbuffer_smart_pointer<CP, D>::resize(size_type size)
+        {
+            if (m_size != size)
+            {
+                XTENSOR_THROW(std::runtime_error, "xbuffer_storage not resizeable");
+            }
+        }
+
+        template <class CP, class D>
+        auto xbuffer_smart_pointer<CP, D>::data() noexcept -> pointer
+        {
+            return p_data;
+        }
+
+        template <class CP, class D>
+        auto xbuffer_smart_pointer<CP, D>::data() const noexcept -> const_pointer
+        {
+            return p_data;
+        }
+
+        template <class CP, class D>
+        void xbuffer_smart_pointer<CP, D>::swap(self_type& rhs) noexcept
+        {
+            using std::swap;
+            swap(p_data, rhs.p_data);
+            swap(m_size, rhs.m_size);
+            swap(m_destruct, rhs.m_destruct);
+        }
+
+        template <class CP, class D>
+        template <class P, class DT>
+        void xbuffer_smart_pointer<CP, D>::reset_data(P&& data, size_type size, DT&& destruct) noexcept
+        {
+            p_data = std::forward<P>(data);
+            m_size = size;
+            m_destruct = destruct;
+        }
+    }
+
+    /***************************************
+     * xbuffer_adaptor_base implementation *
+     ***************************************/
+
+    template <class D>
+    inline bool xbuffer_adaptor_base<D>::empty() const noexcept
+    {
+        return derived_cast().size() == size_type(0);
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::operator[](size_type i) -> reference
+    {
+        return derived_cast().data()[static_cast<index_type>(i)];
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::operator[](size_type i) const -> const_reference
+    {
+        return derived_cast().data()[static_cast<index_type>(i)];
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::front() -> reference
+    {
+        return this->operator[](0);
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::front() const -> const_reference
+    {
+        return this->operator[](0);
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::back() -> reference
+    {
+        return this->operator[](derived_cast().size() - 1);
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::back() const -> const_reference
+    {
+        return this->operator[](derived_cast().size() - 1);
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::begin() noexcept -> iterator
+    {
+        return derived_cast().data();
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::end() noexcept -> iterator
+    {
+        return derived_cast().data() + static_cast<index_type>(derived_cast().size());
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::begin() const noexcept -> const_iterator
+    {
+        return derived_cast().data();
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::end() const noexcept -> const_iterator
+    {
+        return derived_cast().data() + static_cast<index_type>(derived_cast().size());
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::cbegin() const noexcept -> const_iterator
+    {
+        return begin();
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::cend() const noexcept -> const_iterator
+    {
+        return end();
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::rbegin() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(end());
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::rend() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(begin());
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::rbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(end());
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::rend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(begin());
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::crbegin() const noexcept -> const_reverse_iterator
+    {
+        return rbegin();
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::crend() const noexcept -> const_reverse_iterator
+    {
+        return rend();
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::derived_cast() noexcept -> derived_type&
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    template <class D>
+    inline auto xbuffer_adaptor_base<D>::derived_cast() const noexcept -> const derived_type&
+    {
+        return *static_cast<const derived_type*>(this);
+    }
+
+    template <class D>
+    inline bool operator==(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs)
+    {
+        return lhs.derived_cast().size() == rhs.derived_cast().size()
+               && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+    }
+
+    template <class D>
+    inline bool operator!=(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    template <class D>
+    inline bool operator<(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs)
+    {
+        return std::lexicographical_compare(
+            lhs.begin(),
+            lhs.end(),
+            rhs.begin(),
+            rhs.end(),
+            std::less<typename D::value_type>()
+        );
+    }
+
+    template <class D>
+    inline bool operator<=(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs)
+    {
+        return std::lexicographical_compare(
+            lhs.begin(),
+            lhs.end(),
+            rhs.begin(),
+            rhs.end(),
+            std::less_equal<typename D::value_type>()
+        );
+    }
+
+    template <class D>
+    inline bool operator>(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs)
+    {
+        return std::lexicographical_compare(
+            lhs.begin(),
+            lhs.end(),
+            rhs.begin(),
+            rhs.end(),
+            std::greater<typename D::value_type>()
+        );
+    }
+
+    template <class D>
+    inline bool operator>=(const xbuffer_adaptor_base<D>& lhs, const xbuffer_adaptor_base<D>& rhs)
+    {
+        return std::lexicographical_compare(
+            lhs.begin(),
+            lhs.end(),
+            rhs.begin(),
+            rhs.end(),
+            std::greater_equal<typename D::value_type>()
+        );
+    }
+
+    /**********************************
+     * xbuffer_adaptor implementation *
+     **********************************/
+
+    template <class CP, class O, class A>
+    inline auto xbuffer_adaptor<CP, O, A>::operator=(temporary_type&& tmp) -> self_type&
+    {
+        base_type::resize(tmp.size());
+        std::copy(tmp.cbegin(), tmp.cend(), this->begin());
+        return *this;
+    }
+
+    template <class CP, class O, class A>
+    inline void swap(xbuffer_adaptor<CP, O, A>& lhs, xbuffer_adaptor<CP, O, A>& rhs) noexcept
+    {
+        lhs.swap(rhs);
+    }
+
+    /************************************
+     * xiterator_adaptor implementation *
+     ************************************/
+
+    template <class I, class CI>
+    inline xiterator_adaptor<I, CI>::xiterator_adaptor(I it, CI cit, size_type size)
+        : m_it(it)
+        , m_cit(cit)
+        , m_size(size)
+    {
+    }
+
+    template <class I, class CI>
+    inline auto xiterator_adaptor<I, CI>::operator=(const temporary_type& rhs) -> self_type&
+    {
+        resize(rhs.size());
+        std::copy(rhs.cbegin(), rhs.cend(), m_it);
+        return *this;
+    }
+
+    template <class I, class CI>
+    inline auto xiterator_adaptor<I, CI>::operator=(temporary_type&& rhs) -> self_type&
+    {
+        return (*this = rhs);
+    }
+
+    template <class I, class CI>
+    inline auto xiterator_adaptor<I, CI>::size() const noexcept -> size_type
+    {
+        return m_size;
+    }
+
+    template <class I, class CI>
+    inline void xiterator_adaptor<I, CI>::resize(size_type size)
+    {
+        if (m_size != size)
+        {
+            XTENSOR_THROW(std::runtime_error, "xiterator_adaptor not resizeable");
+        }
+    }
+
+    template <class I, class CI>
+    inline auto xiterator_adaptor<I, CI>::data() noexcept -> iterator
+    {
+        return m_it;
+    }
+
+    template <class I, class CI>
+    inline auto xiterator_adaptor<I, CI>::data() const noexcept -> const_iterator
+    {
+        return m_cit;
+    }
+
+    template <class I, class CI>
+    inline void xiterator_adaptor<I, CI>::swap(self_type& rhs) noexcept
+    {
+        using std::swap;
+        swap(m_it, rhs.m_it);
+        swap(m_cit, rhs.m_cit);
+        swap(m_size, rhs.m_size);
+    }
+
+    template <class I, class CI>
+    inline void swap(xiterator_adaptor<I, CI>& lhs, xiterator_adaptor<I, CI>& rhs) noexcept
+    {
+        lhs.swap(rhs);
+    }
+
+    /******************************************
+     * xiterator_owner_adaptor implementation *
+     ******************************************/
+
+    template <class C, class IG>
+    inline xiterator_owner_adaptor<C, IG>::xiterator_owner_adaptor(C&& c)
+        : m_container(std::move(c))
+    {
+        init_iterators();
+    }
+
+    template <class C, class IG>
+    inline xiterator_owner_adaptor<C, IG>::xiterator_owner_adaptor(const self_type& rhs)
+        : m_container(rhs.m_container)
+    {
+        init_iterators();
+    }
+
+    template <class C, class IG>
+    inline xiterator_owner_adaptor<C, IG>& xiterator_owner_adaptor<C, IG>::operator=(const self_type& rhs)
+    {
+        m_container = rhs.m_container;
+        init_iterators();
+    }
+
+    template <class C, class IG>
+    inline xiterator_owner_adaptor<C, IG>::xiterator_owner_adaptor(self_type&& rhs)
+        : m_container(std::move(rhs.m_container))
+    {
+        init_iterators();
+    }
+
+    template <class C, class IG>
+    inline xiterator_owner_adaptor<C, IG>& xiterator_owner_adaptor<C, IG>::operator=(self_type&& rhs)
+    {
+        m_container = std::move(rhs.m_container);
+        init_iterators();
+    }
+
+    template <class C, class IG>
+    inline xiterator_owner_adaptor<C, IG>& xiterator_owner_adaptor<C, IG>::operator=(const temporary_type& rhs)
+    {
+        resize(rhs.size());
+        std::copy(rhs.cbegin(), rhs.cend(), m_it);
+        return *this;
+    }
+
+    template <class C, class IG>
+    inline xiterator_owner_adaptor<C, IG>& xiterator_owner_adaptor<C, IG>::operator=(temporary_type&& rhs)
+    {
+        return (*this = rhs);
+    }
+
+    template <class C, class IG>
+    inline auto xiterator_owner_adaptor<C, IG>::size() const noexcept -> size_type
+    {
+        return m_size;
+    }
+
+    template <class C, class IG>
+    inline void xiterator_owner_adaptor<C, IG>::resize(size_type size)
+    {
+        if (m_size != size)
+        {
+            XTENSOR_THROW(std::runtime_error, "xiterator_owner_adaptor not resizeable");
+        }
+    }
+
+    template <class C, class IG>
+    inline auto xiterator_owner_adaptor<C, IG>::data() noexcept -> iterator
+    {
+        return m_it;
+    }
+
+    template <class C, class IG>
+    inline auto xiterator_owner_adaptor<C, IG>::data() const noexcept -> const_iterator
+    {
+        return m_cit;
+    }
+
+    template <class C, class IG>
+    inline void xiterator_owner_adaptor<C, IG>::swap(self_type& rhs) noexcept
+    {
+        using std::swap;
+        swap(m_container, rhs.m_container);
+        init_iterators();
+        rhs.init_iterators();
+    }
+
+    template <class C, class IG>
+    inline void xiterator_owner_adaptor<C, IG>::init_iterators()
+    {
+        m_it = IG::begin(m_container);
+        m_cit = IG::cbegin(m_container);
+        m_size = IG::size(m_container);
+    }
+
+    template <class C, class IG>
+    inline void swap(xiterator_owner_adaptor<C, IG>& lhs, xiterator_owner_adaptor<C, IG>& rhs) noexcept
+    {
+        lhs.swap(rhs);
+    }
+
+    /*****************************************
+     * make_xiterator_adaptor implementation *
+     *****************************************/
+
+    namespace detail
+    {
+        template <class C, class IG, bool = std::is_lvalue_reference<C>::value>
+        struct xiterator_adaptor_builder
+        {
+            using iterator = decltype(IG::begin(std::declval<C>()));
+            using const_iterator = decltype(IG::cbegin(std::declval<C>()));
+            using type = xiterator_adaptor<iterator, const_iterator>;
+
+            inline static type build(C& c)
+            {
+                return type(IG::begin(c), IG::cbegin(c), IG::size(c));
+            }
+        };
+
+        template <class C, class IG>
+        struct xiterator_adaptor_builder<C, IG, false>
+        {
+            using type = xiterator_owner_adaptor<C, IG>;
+
+            inline static type build(C&& c)
+            {
+                return type(std::move(c));
+            }
+        };
+    }
+
+    template <class C, class IG>
+    inline auto make_xiterator_adaptor(C&& container, IG)
+    {
+        using builder_type = detail::xiterator_adaptor_builder<C, IG>;
+        return builder_type::build(std::forward<C>(container));
+    }
+}
+
+#endif

+ 1261 - 0
3rd/numpy/include/xtensor/xbuilder.hpp

@@ -0,0 +1,1261 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+/**
+ * @brief standard mathematical functions for xexpressions
+ */
+
+#ifndef XTENSOR_BUILDER_HPP
+#define XTENSOR_BUILDER_HPP
+
+#include <array>
+#include <chrono>
+#include <cmath>
+#include <cstddef>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include <xtl/xclosure.hpp>
+#include <xtl/xsequence.hpp>
+#include <xtl/xtype_traits.hpp>
+
+#include "xbroadcast.hpp"
+#include "xfunction.hpp"
+#include "xgenerator.hpp"
+#include "xoperation.hpp"
+
+namespace xt
+{
+
+    /********
+     * ones *
+     ********/
+
+    /**
+     * Returns an \ref xexpression containing ones of the specified shape.
+     * @tparam shape the shape of the returned expression.
+     */
+    template <class T, class S>
+    inline auto ones(S shape) noexcept
+    {
+        return broadcast(T(1), std::forward<S>(shape));
+    }
+
+    template <class T, class I, std::size_t L>
+    inline auto ones(const I (&shape)[L]) noexcept
+    {
+        return broadcast(T(1), shape);
+    }
+
+    /*********
+     * zeros *
+     *********/
+
+    /**
+     * Returns an \ref xexpression containing zeros of the specified shape.
+     * @tparam shape the shape of the returned expression.
+     */
+    template <class T, class S>
+    inline auto zeros(S shape) noexcept
+    {
+        return broadcast(T(0), std::forward<S>(shape));
+    }
+
+    template <class T, class I, std::size_t L>
+    inline auto zeros(const I (&shape)[L]) noexcept
+    {
+        return broadcast(T(0), shape);
+    }
+
+    /**
+     * Create a xcontainer (xarray, xtensor or xtensor_fixed) with uninitialized values of
+     * with value_type T and shape. Selects the best container match automatically
+     * from the supplied shape.
+     *
+     * - ``std::vector`` → ``xarray<T>``
+     * - ``std::array`` or ``initializer_list`` → ``xtensor<T, N>``
+     * - ``xshape<N...>`` → ``xtensor_fixed<T, xshape<N...>>``
+     *
+     * @param shape shape of the new xcontainer
+     */
+    template <class T, layout_type L = XTENSOR_DEFAULT_LAYOUT, class S>
+    inline xarray<T, L> empty(const S& shape)
+    {
+        return xarray<T, L>::from_shape(shape);
+    }
+
+    template <class T, layout_type L = XTENSOR_DEFAULT_LAYOUT, class ST, std::size_t N>
+    inline xtensor<T, N, L> empty(const std::array<ST, N>& shape)
+    {
+        using shape_type = typename xtensor<T, N>::shape_type;
+        return xtensor<T, N, L>(xtl::forward_sequence<shape_type, decltype(shape)>(shape));
+    }
+
+    template <class T, layout_type L = XTENSOR_DEFAULT_LAYOUT, class I, std::size_t N>
+    inline xtensor<T, N, L> empty(const I (&shape)[N])
+    {
+        using shape_type = typename xtensor<T, N>::shape_type;
+        return xtensor<T, N, L>(xtl::forward_sequence<shape_type, decltype(shape)>(shape));
+    }
+
+    template <class T, layout_type L = XTENSOR_DEFAULT_LAYOUT, std::size_t... N>
+    inline xtensor_fixed<T, fixed_shape<N...>, L> empty(const fixed_shape<N...>& /*shape*/)
+    {
+        return xtensor_fixed<T, fixed_shape<N...>, L>();
+    }
+
+    /**
+     * Create a xcontainer (xarray, xtensor or xtensor_fixed) with uninitialized values of
+     * the same shape, value type and layout as the input xexpression *e*.
+     *
+     * @param e the xexpression from which to extract shape, value type and layout.
+     */
+    template <class E>
+    inline auto empty_like(const xexpression<E>& e)
+    {
+        using xtype = temporary_type_t<E>;
+        auto res = xtype::from_shape(e.derived_cast().shape());
+        return res;
+    }
+
+    /**
+     * Create a xcontainer (xarray, xtensor or xtensor_fixed), filled with *fill_value* and of
+     * the same shape, value type and layout as the input xexpression *e*.
+     *
+     * @param e the xexpression from which to extract shape, value type and layout.
+     * @param fill_value the value used to set each element of the returned xcontainer.
+     */
+    template <class E>
+    inline auto full_like(const xexpression<E>& e, typename E::value_type fill_value)
+    {
+        using xtype = temporary_type_t<E>;
+        auto res = xtype::from_shape(e.derived_cast().shape());
+        res.fill(fill_value);
+        return res;
+    }
+
+    /**
+     * Create a xcontainer (xarray, xtensor or xtensor_fixed), filled with zeros and of
+     * the same shape, value type and layout as the input xexpression *e*.
+     *
+     * Note: contrary to zeros(shape), this function returns a non-lazy, allocated container!
+     * Use ``xt::zeros<double>(e.shape());` for a lazy version.
+     *
+     * @param e the xexpression from which to extract shape, value type and layout.
+     */
+    template <class E>
+    inline auto zeros_like(const xexpression<E>& e)
+    {
+        return full_like(e, typename E::value_type(0));
+    }
+
+    /**
+     * Create a xcontainer (xarray, xtensor or xtensor_fixed), filled with ones and of
+     * the same shape, value type and layout as the input xexpression *e*.
+     *
+     * Note: contrary to ones(shape), this function returns a non-lazy, evaluated container!
+     * Use ``xt::ones<double>(e.shape());`` for a lazy version.
+     *
+     * @param e the xexpression from which to extract shape, value type and layout.
+     */
+    template <class E>
+    inline auto ones_like(const xexpression<E>& e)
+    {
+        return full_like(e, typename E::value_type(1));
+    }
+
+    namespace detail
+    {
+        template <class T, class S>
+        struct get_mult_type_impl
+        {
+            using type = T;
+        };
+
+        template <class T, class R, class P>
+        struct get_mult_type_impl<T, std::chrono::duration<R, P>>
+        {
+            using type = R;
+        };
+
+        template <class T, class S>
+        using get_mult_type = typename get_mult_type_impl<T, S>::type;
+
+        // These methods should be private methods of arange_generator, however thi leads
+        // to ICE on VS2015
+        template <class R, class E, class U, class X, XTL_REQUIRES(xtl::is_integral<X>)>
+        inline void arange_assign_to(xexpression<E>& e, U start, U, X step, bool) noexcept
+        {
+            auto& de = e.derived_cast();
+            U value = start;
+
+            for (auto&& el : de.storage())
+            {
+                el = static_cast<R>(value);
+                value += step;
+            }
+        }
+
+        template <class R, class E, class U, class X, XTL_REQUIRES(xtl::negation<xtl::is_integral<X>>)>
+        inline void arange_assign_to(xexpression<E>& e, U start, U stop, X step, bool endpoint) noexcept
+        {
+            auto& buf = e.derived_cast().storage();
+            using size_type = decltype(buf.size());
+            using mult_type = get_mult_type<U, X>;
+            size_type num = buf.size();
+            for (size_type i = 0; i < num; ++i)
+            {
+                buf[i] = static_cast<R>(start + step * mult_type(i));
+            }
+            if (endpoint && num > 1)
+            {
+                buf[num - 1] = static_cast<R>(stop);
+            }
+        }
+
+        template <class T, class R = T, class S = T>
+        class arange_generator
+        {
+        public:
+
+            using value_type = R;
+            using step_type = S;
+
+            arange_generator(T start, T stop, S step, size_t num_steps, bool endpoint = false)
+                : m_start(start)
+                , m_stop(stop)
+                , m_step(step)
+                , m_num_steps(num_steps)
+                , m_endpoint(endpoint)
+            {
+            }
+
+            template <class... Args>
+            inline R operator()(Args... args) const
+            {
+                return access_impl(args...);
+            }
+
+            template <class It>
+            inline R element(It first, It) const
+            {
+                return access_impl(*first);
+            }
+
+            template <class E>
+            inline void assign_to(xexpression<E>& e) const noexcept
+            {
+                arange_assign_to<R>(e, m_start, m_stop, m_step, m_endpoint);
+            }
+
+        private:
+
+            T m_start;
+            T m_stop;
+            step_type m_step;
+            size_t m_num_steps;
+            bool m_endpoint;  // true for setting the last element to m_stop
+
+            template <class T1, class... Args>
+            inline R access_impl(T1 t, Args...) const
+            {
+                if (m_endpoint && m_num_steps > 1 && size_t(t) == m_num_steps - 1)
+                {
+                    return static_cast<R>(m_stop);
+                }
+                // Avoids warning when T = char (because char + char => int!)
+                using mult_type = get_mult_type<T, S>;
+                return static_cast<R>(m_start + m_step * mult_type(t));
+            }
+
+            inline R access_impl() const
+            {
+                return static_cast<R>(m_start);
+            }
+        };
+
+        template <class T, class S>
+        using both_integer = xtl::conjunction<xtl::is_integral<T>, xtl::is_integral<S>>;
+
+        template <class T, class S>
+        using integer_with_signed_integer = xtl::conjunction<both_integer<T, S>, xtl::is_signed<S>>;
+
+        template <class T, class S>
+        using integer_with_unsigned_integer = xtl::conjunction<both_integer<T, S>, std::is_unsigned<S>>;
+
+        template <class T, class S = T, XTL_REQUIRES(xtl::negation<both_integer<T, S>>)>
+        inline auto arange_impl(T start, T stop, S step = 1) noexcept
+        {
+            std::size_t shape = static_cast<std::size_t>(std::ceil((stop - start) / step));
+            return detail::make_xgenerator(detail::arange_generator<T, T, S>(start, stop, step, shape), {shape});
+        }
+
+        template <class T, class S = T, XTL_REQUIRES(integer_with_signed_integer<T, S>)>
+        inline auto arange_impl(T start, T stop, S step = 1) noexcept
+        {
+            bool empty_cond = (stop - start) / step <= 0;
+            std::size_t shape = 0;
+            if (!empty_cond)
+            {
+                shape = stop > start ? static_cast<std::size_t>((stop - start + step - S(1)) / step)
+                                     : static_cast<std::size_t>((start - stop - step - S(1)) / -step);
+            }
+            return detail::make_xgenerator(detail::arange_generator<T, T, S>(start, stop, step, shape), {shape});
+        }
+
+        template <class T, class S = T, XTL_REQUIRES(integer_with_unsigned_integer<T, S>)>
+        inline auto arange_impl(T start, T stop, S step = 1) noexcept
+        {
+            bool empty_cond = stop <= start;
+            std::size_t shape = 0;
+            if (!empty_cond)
+            {
+                shape = static_cast<std::size_t>((stop - start + step - S(1)) / step);
+            }
+            return detail::make_xgenerator(detail::arange_generator<T, T, S>(start, stop, step, shape), {shape});
+        }
+
+        template <class F>
+        class fn_impl
+        {
+        public:
+
+            using value_type = typename F::value_type;
+            using size_type = std::size_t;
+
+            fn_impl(F&& f)
+                : m_ft(f)
+            {
+            }
+
+            inline value_type operator()() const
+            {
+                size_type idx[1] = {0ul};
+                return access_impl(std::begin(idx), std::end(idx));
+            }
+
+            template <class... Args>
+            inline value_type operator()(Args... args) const
+            {
+                size_type idx[sizeof...(Args)] = {static_cast<size_type>(args)...};
+                return access_impl(std::begin(idx), std::end(idx));
+            }
+
+            template <class It>
+            inline value_type element(It first, It last) const
+            {
+                return access_impl(first, last);
+            }
+
+        private:
+
+            F m_ft;
+
+            template <class It>
+            inline value_type access_impl(const It& begin, const It& end) const
+            {
+                return m_ft(begin, end);
+            }
+        };
+
+        template <class T>
+        class eye_fn
+        {
+        public:
+
+            using value_type = T;
+
+            eye_fn(int k)
+                : m_k(k)
+            {
+            }
+
+            template <class It>
+            inline T operator()(const It& /*begin*/, const It& end) const
+            {
+                using lvalue_type = typename std::iterator_traits<It>::value_type;
+                return *(end - 1) == *(end - 2) + static_cast<lvalue_type>(m_k) ? T(1) : T(0);
+            }
+
+        private:
+
+            std::ptrdiff_t m_k;
+        };
+    }
+
+    /**
+     * Generates an array with ones on the diagonal.
+     * @param shape shape of the resulting expression
+     * @param k index of the diagonal. 0 (default) refers to the main diagonal,
+     *          a positive value refers to an upper diagonal, and a negative
+     *          value to a lower diagonal.
+     * @tparam T value_type of xexpression
+     * @return xgenerator that generates the values on access
+     */
+    template <class T = bool>
+    inline auto eye(const std::vector<std::size_t>& shape, int k = 0)
+    {
+        return detail::make_xgenerator(detail::fn_impl<detail::eye_fn<T>>(detail::eye_fn<T>(k)), shape);
+    }
+
+    /**
+     * Generates a (n x n) array with ones on the diagonal.
+     * @param n length of the diagonal.
+     * @param k index of the diagonal. 0 (default) refers to the main diagonal,
+     *          a positive value refers to an upper diagonal, and a negative
+     *          value to a lower diagonal.
+     * @tparam T value_type of xexpression
+     * @return xgenerator that generates the values on access
+     */
+    template <class T = bool>
+    inline auto eye(std::size_t n, int k = 0)
+    {
+        return eye<T>({n, n}, k);
+    }
+
+    /**
+     * Generates numbers evenly spaced within given half-open interval [start, stop).
+     * @param start start of the interval
+     * @param stop stop of the interval
+     * @param step stepsize
+     * @tparam T value_type of xexpression
+     * @return xgenerator that generates the values on access
+     */
+    template <class T, class S = T>
+    inline auto arange(T start, T stop, S step = 1) noexcept
+    {
+        return detail::arange_impl(start, stop, step);
+    }
+
+    /**
+     * Generate numbers evenly spaced within given half-open interval [0, stop)
+     * with a step size of 1.
+     * @param stop stop of the interval
+     * @tparam T value_type of xexpression
+     * @return xgenerator that generates the values on access
+     */
+    template <class T>
+    inline auto arange(T stop) noexcept
+    {
+        return arange<T>(T(0), stop, T(1));
+    }
+
+    /**
+     * Generates @a num_samples evenly spaced numbers over given interval
+     * @param start start of interval
+     * @param stop stop of interval
+     * @param num_samples number of samples (defaults to 50)
+     * @param endpoint if true, include endpoint (defaults to true)
+     * @tparam T value_type of xexpression
+     * @return xgenerator that generates the values on access
+     */
+    template <class T>
+    inline auto linspace(T start, T stop, std::size_t num_samples = 50, bool endpoint = true) noexcept
+    {
+        using fp_type = std::common_type_t<T, double>;
+        fp_type step = fp_type(stop - start) / std::fmax(fp_type(1), fp_type(num_samples - (endpoint ? 1 : 0)));
+        return detail::make_xgenerator(
+            detail::arange_generator<fp_type, T>(fp_type(start), fp_type(stop), step, num_samples, endpoint),
+            {num_samples}
+        );
+    }
+
+    /**
+     * Generates @a num_samples numbers evenly spaced on a log scale over given interval
+     * @param start start of interval (pow(base, start) is the first value).
+     * @param stop stop of interval (pow(base, stop) is the final value, except if endpoint = false)
+     * @param num_samples number of samples (defaults to 50)
+     * @param base the base of the log space.
+     * @param endpoint if true, include endpoint (defaults to true)
+     * @tparam T value_type of xexpression
+     * @return xgenerator that generates the values on access
+     */
+    template <class T>
+    inline auto logspace(T start, T stop, std::size_t num_samples, T base = 10, bool endpoint = true) noexcept
+    {
+        return pow(std::move(base), linspace(start, stop, num_samples, endpoint));
+    }
+
+    namespace detail
+    {
+        template <class... CT>
+        class concatenate_access
+        {
+        public:
+
+            using tuple_type = std::tuple<CT...>;
+            using size_type = std::size_t;
+            using value_type = xtl::promote_type_t<typename std::decay_t<CT>::value_type...>;
+
+            template <class It>
+            inline value_type access(const tuple_type& t, size_type axis, It first, It last) const
+            {
+                // trim off extra indices if provided to match behavior of containers
+                auto dim_offset = std::distance(first, last) - std::get<0>(t).dimension();
+                size_t axis_dim = *(first + axis + dim_offset);
+                auto match = [&](auto& arr)
+                {
+                    if (axis_dim >= arr.shape()[axis])
+                    {
+                        axis_dim -= arr.shape()[axis];
+                        return false;
+                    }
+                    return true;
+                };
+
+                auto get = [&](auto& arr)
+                {
+                    size_t offset = 0;
+                    const size_t end = arr.dimension();
+                    for (size_t i = 0; i < end; i++)
+                    {
+                        const auto& shape = arr.shape();
+                        const size_t stride = std::accumulate(
+                            shape.begin() + i + 1,
+                            shape.end(),
+                            1,
+                            std::multiplies<size_t>()
+                        );
+                        if (i == axis)
+                        {
+                            offset += axis_dim * stride;
+                        }
+                        else
+                        {
+                            const auto len = (*(first + i + dim_offset));
+                            offset += len * stride;
+                        }
+                    }
+                    const auto element = arr.begin() + offset;
+                    return *element;
+                };
+
+                size_type i = 0;
+                for (; i < sizeof...(CT); ++i)
+                {
+                    if (apply<bool>(i, match, t))
+                    {
+                        break;
+                    }
+                }
+                return apply<value_type>(i, get, t);
+            }
+        };
+
+        template <class... CT>
+        class stack_access
+        {
+        public:
+
+            using tuple_type = std::tuple<CT...>;
+            using size_type = std::size_t;
+            using value_type = xtl::promote_type_t<typename std::decay_t<CT>::value_type...>;
+
+            template <class It>
+            inline value_type access(const tuple_type& t, size_type axis, It first, It) const
+            {
+                auto get_item = [&](auto& arr)
+                {
+                    size_t offset = 0;
+                    const size_t end = arr.dimension();
+                    size_t after_axis = 0;
+                    for (size_t i = 0; i < end; i++)
+                    {
+                        if (i == axis)
+                        {
+                            after_axis = 1;
+                        }
+                        const auto& shape = arr.shape();
+                        const size_t stride = std::accumulate(
+                            shape.begin() + i + 1,
+                            shape.end(),
+                            1,
+                            std::multiplies<size_t>()
+                        );
+                        const auto len = (*(first + i + after_axis));
+                        offset += len * stride;
+                    }
+                    const auto element = arr.begin() + offset;
+                    return *element;
+                };
+                size_type i = *(first + axis);
+                return apply<value_type>(i, get_item, t);
+            }
+        };
+
+        template <class... CT>
+        class vstack_access
+        {
+        public:
+
+            using tuple_type = std::tuple<CT...>;
+            using size_type = std::size_t;
+            using value_type = xtl::promote_type_t<typename std::decay_t<CT>::value_type...>;
+
+            template <class It>
+            inline value_type access(const tuple_type& t, size_type axis, It first, It last) const
+            {
+                if (std::get<0>(t).dimension() == 1)
+                {
+                    return stack.access(t, axis, first, last);
+                }
+                else
+                {
+                    return concatonate.access(t, axis, first, last);
+                }
+            }
+
+        private:
+
+            concatenate_access<CT...> concatonate;
+            stack_access<CT...> stack;
+        };
+
+        template <template <class...> class F, class... CT>
+        class concatenate_invoker
+        {
+        public:
+
+            using tuple_type = std::tuple<CT...>;
+            using size_type = std::size_t;
+            using value_type = xtl::promote_type_t<typename std::decay_t<CT>::value_type...>;
+
+            inline concatenate_invoker(tuple_type&& t, size_type axis)
+                : m_t(std::move(t))
+                , m_axis(axis)
+            {
+            }
+
+            template <class... Args>
+            inline value_type operator()(Args... args) const
+            {
+                // TODO: avoid memory allocation
+                xindex index({static_cast<size_type>(args)...});
+                return access_method.access(m_t, m_axis, index.begin(), index.end());
+            }
+
+            template <class It>
+            inline value_type element(It first, It last) const
+            {
+                return access_method.access(m_t, m_axis, first, last);
+            }
+
+        private:
+
+            F<CT...> access_method;
+            tuple_type m_t;
+            size_type m_axis;
+        };
+
+        template <class... CT>
+        using concatenate_impl = concatenate_invoker<concatenate_access, CT...>;
+
+        template <class... CT>
+        using stack_impl = concatenate_invoker<stack_access, CT...>;
+
+        template <class... CT>
+        using vstack_impl = concatenate_invoker<vstack_access, CT...>;
+
+        template <class CT>
+        class repeat_impl
+        {
+        public:
+
+            using xexpression_type = std::decay_t<CT>;
+            using size_type = typename xexpression_type::size_type;
+            using value_type = typename xexpression_type::value_type;
+
+            template <class CTA>
+            repeat_impl(CTA&& source, size_type axis)
+                : m_source(std::forward<CTA>(source))
+                , m_axis(axis)
+            {
+            }
+
+            template <class... Args>
+            value_type operator()(Args... args) const
+            {
+                std::array<size_type, sizeof...(Args)> args_arr = {static_cast<size_type>(args)...};
+                return m_source(args_arr[m_axis]);
+            }
+
+            template <class It>
+            inline value_type element(It first, It) const
+            {
+                return m_source(*(first + static_cast<std::ptrdiff_t>(m_axis)));
+            }
+
+        private:
+
+            CT m_source;
+            size_type m_axis;
+        };
+    }
+
+    /**
+     * @brief Creates tuples from arguments for \ref concatenate and \ref stack.
+     *        Very similar to std::make_tuple.
+     */
+    template <class... Types>
+    inline auto xtuple(Types&&... args)
+    {
+        return std::tuple<xtl::const_closure_type_t<Types>...>(std::forward<Types>(args)...);
+    }
+
+    namespace detail
+    {
+        template <bool... values>
+        using all_true = xtl::conjunction<std::integral_constant<bool, values>...>;
+
+        template <class X, class Y, std::size_t axis, class AxesSequence>
+        struct concat_fixed_shape_impl;
+
+        template <class X, class Y, std::size_t axis, std::size_t... Is>
+        struct concat_fixed_shape_impl<X, Y, axis, std::index_sequence<Is...>>
+        {
+            static_assert(X::size() == Y::size(), "Concatenation requires equisized shapes");
+            static_assert(axis < X::size(), "Concatenation requires a valid axis");
+            static_assert(
+                all_true<(axis == Is || X::template get<Is>() == Y::template get<Is>())...>::value,
+                "Concatenation requires compatible shapes and axis"
+            );
+
+            using type = fixed_shape<
+                (axis == Is ? X::template get<Is>() + Y::template get<Is>() : X::template get<Is>())...>;
+        };
+
+        template <std::size_t axis, class X, class Y, class... Rest>
+        struct concat_fixed_shape;
+
+        template <std::size_t axis, class X, class Y>
+        struct concat_fixed_shape<axis, X, Y>
+        {
+            using type = typename concat_fixed_shape_impl<X, Y, axis, std::make_index_sequence<X::size()>>::type;
+        };
+
+        template <std::size_t axis, class X, class Y, class... Rest>
+        struct concat_fixed_shape
+        {
+            using type = typename concat_fixed_shape<axis, X, typename concat_fixed_shape<axis, Y, Rest...>::type>::type;
+        };
+
+        template <std::size_t axis, class... Args>
+        using concat_fixed_shape_t = typename concat_fixed_shape<axis, Args...>::type;
+
+        template <class... CT>
+        using all_fixed_shapes = detail::all_fixed<typename std::decay_t<CT>::shape_type...>;
+
+        struct concat_shape_builder_t
+        {
+            template <class Shape, bool = detail::is_fixed<Shape>::value>
+            struct concat_shape;
+
+            template <class Shape>
+            struct concat_shape<Shape, true>
+            {
+                // Convert `fixed_shape` to `static_shape` to allow runtime dimension calculation.
+                using type = static_shape<typename Shape::value_type, Shape::size()>;
+            };
+
+            template <class Shape>
+            struct concat_shape<Shape, false>
+            {
+                using type = Shape;
+            };
+
+            template <class... Args>
+            static auto build(const std::tuple<Args...>& t, std::size_t axis)
+            {
+                using shape_type = promote_shape_t<
+                    typename concat_shape<typename std::decay_t<Args>::shape_type>::type...>;
+                using source_shape_type = decltype(std::get<0>(t).shape());
+                shape_type new_shape = xtl::forward_sequence<shape_type, source_shape_type>(
+                    std::get<0>(t).shape()
+                );
+
+                auto check_shape = [&axis, &new_shape](auto& arr)
+                {
+                    std::size_t s = new_shape.size();
+                    bool res = s == arr.dimension();
+                    for (std::size_t i = 0; i < s; ++i)
+                    {
+                        res = res && (i == axis || new_shape[i] == arr.shape(i));
+                    }
+                    if (!res)
+                    {
+                        throw_concatenate_error(new_shape, arr.shape());
+                    }
+                };
+                for_each(check_shape, t);
+
+                auto shape_at_axis = [&axis](std::size_t prev, auto& arr) -> std::size_t
+                {
+                    return prev + arr.shape()[axis];
+                };
+                new_shape[axis] += accumulate(shape_at_axis, std::size_t(0), t) - new_shape[axis];
+
+                return new_shape;
+            }
+        };
+
+    }  // namespace detail
+
+    /***************
+     * concatenate *
+     ***************/
+
+    /**
+     * @brief Concatenates xexpressions along \em axis.
+     *
+     * @param t \ref xtuple of xexpressions to concatenate
+     * @param axis axis along which elements are concatenated
+     * @returns xgenerator evaluating to concatenated elements
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {{1, 2, 3}};
+     * xt::xarray<double> b = {{2, 3, 4}};
+     * xt::xarray<double> c = xt::concatenate(xt::xtuple(a, b)); // => {{1, 2, 3},
+     *                                                           //     {2, 3, 4}}
+     * xt::xarray<double> d = xt::concatenate(xt::xtuple(a, b), 1); // => {{1, 2, 3, 2, 3, 4}}
+     * @endcode
+     */
+    template <class... CT>
+    inline auto concatenate(std::tuple<CT...>&& t, std::size_t axis = 0)
+    {
+        const auto shape = detail::concat_shape_builder_t::build(t, axis);
+        return detail::make_xgenerator(detail::concatenate_impl<CT...>(std::move(t), axis), shape);
+    }
+
+    template <std::size_t axis, class... CT, typename = std::enable_if_t<detail::all_fixed_shapes<CT...>::value>>
+    inline auto concatenate(std::tuple<CT...>&& t)
+    {
+        using shape_type = detail::concat_fixed_shape_t<axis, typename std::decay_t<CT>::shape_type...>;
+        return detail::make_xgenerator(detail::concatenate_impl<CT...>(std::move(t), axis), shape_type{});
+    }
+
+    namespace detail
+    {
+        template <class T, std::size_t N>
+        inline std::array<T, N + 1> add_axis(std::array<T, N> arr, std::size_t axis, std::size_t value)
+        {
+            std::array<T, N + 1> temp;
+            std::copy(arr.begin(), arr.begin() + axis, temp.begin());
+            temp[axis] = value;
+            std::copy(arr.begin() + axis, arr.end(), temp.begin() + axis + 1);
+            return temp;
+        }
+
+        template <class T>
+        inline T add_axis(T arr, std::size_t axis, std::size_t value)
+        {
+            T temp(arr);
+            temp.insert(temp.begin() + std::ptrdiff_t(axis), value);
+            return temp;
+        }
+    }
+
+    /**
+     * @brief Stack xexpressions along \em axis.
+     *        Stacking always creates a new dimension along which elements are stacked.
+     *
+     * @param t \ref xtuple of xexpressions to concatenate
+     * @param axis axis along which elements are stacked
+     * @returns xgenerator evaluating to stacked elements
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {1, 2, 3};
+     * xt::xarray<double> b = {5, 6, 7};
+     * xt::xarray<double> s = xt::stack(xt::xtuple(a, b)); // => {{1, 2, 3},
+     *                                                     //     {5, 6, 7}}
+     * xt::xarray<double> t = xt::stack(xt::xtuple(a, b), 1); // => {{1, 5},
+     *                                                        //     {2, 6},
+     *                                                        //     {3, 7}}
+     * @endcode
+     */
+    template <class... CT>
+    inline auto stack(std::tuple<CT...>&& t, std::size_t axis = 0)
+    {
+        using shape_type = promote_shape_t<typename std::decay_t<CT>::shape_type...>;
+        using source_shape_type = decltype(std::get<0>(t).shape());
+        auto new_shape = detail::add_axis(
+            xtl::forward_sequence<shape_type, source_shape_type>(std::get<0>(t).shape()),
+            axis,
+            sizeof...(CT)
+        );
+        return detail::make_xgenerator(detail::stack_impl<CT...>(std::move(t), axis), new_shape);
+    }
+
+    /**
+     * @brief Stack xexpressions in sequence horizontally (column wise).
+     * This is equivalent to concatenation along the second axis, except for 1-D
+     * xexpressions where it concatenate along the first axis.
+     *
+     * @param t \ref xtuple of xexpressions to stack
+     * @return xgenerator evaluating to stacked elements
+     */
+    template <class... CT>
+    inline auto hstack(std::tuple<CT...>&& t)
+    {
+        auto dim = std::get<0>(t).dimension();
+        std::size_t axis = dim > std::size_t(1) ? 1 : 0;
+        return concatenate(std::move(t), axis);
+    }
+
+    namespace detail
+    {
+        template <class S, class... CT>
+        inline auto vstack_shape(std::tuple<CT...>& t, const S& shape)
+        {
+            using size_type = typename S::value_type;
+            auto res = shape.size() == size_type(1)
+                           ? S({sizeof...(CT), shape[0]})
+                           : concat_shape_builder_t::build(std::move(t), size_type(0));
+            return res;
+        }
+
+        template <class T, class... CT>
+        inline auto vstack_shape(const std::tuple<CT...>&, std::array<T, 1> shape)
+        {
+            std::array<T, 2> res = {sizeof...(CT), shape[0]};
+            return res;
+        }
+    }
+
+    /**
+     * @brief Stack xexpressions in sequence vertically (row wise).
+     * This is equivalent to concatenation along the first axis after
+     * 1-D arrays of shape (N) have been reshape to (1, N).
+     *
+     * @param t \ref xtuple of xexpressions to stack
+     * @return xgenerator evaluating to stacked elements
+     */
+    template <class... CT>
+    inline auto vstack(std::tuple<CT...>&& t)
+    {
+        using shape_type = promote_shape_t<typename std::decay_t<CT>::shape_type...>;
+        using source_shape_type = decltype(std::get<0>(t).shape());
+        auto new_shape = detail::vstack_shape(
+            t,
+            xtl::forward_sequence<shape_type, source_shape_type>(std::get<0>(t).shape())
+        );
+        return detail::make_xgenerator(detail::vstack_impl<CT...>(std::move(t), size_t(0)), new_shape);
+    }
+
+    namespace detail
+    {
+
+        template <std::size_t... I, class... E>
+        inline auto meshgrid_impl(std::index_sequence<I...>, E&&... e) noexcept
+        {
+#if defined _MSC_VER
+            const std::array<std::size_t, sizeof...(E)> shape = {e.shape()[0]...};
+            return std::make_tuple(
+                detail::make_xgenerator(detail::repeat_impl<xclosure_t<E>>(std::forward<E>(e), I), shape)...
+            );
+#else
+            return std::make_tuple(detail::make_xgenerator(
+                detail::repeat_impl<xclosure_t<E>>(std::forward<E>(e), I),
+                {e.shape()[0]...}
+            )...);
+#endif
+        }
+    }
+
+    /**
+     * @brief Return coordinate tensors from coordinate vectors.
+     *        Make N-D coordinate tensor expressions for vectorized evaluations of N-D scalar/vector
+     *        fields over N-D grids, given one-dimensional coordinate arrays x1, x2,..., xn.
+     *
+     * @param e xexpressions to concatenate
+     * @returns tuple of xgenerator expressions.
+     */
+    template <class... E>
+    inline auto meshgrid(E&&... e) noexcept
+    {
+        return detail::meshgrid_impl(std::make_index_sequence<sizeof...(E)>(), std::forward<E>(e)...);
+    }
+
+    namespace detail
+    {
+        template <class CT>
+        class diagonal_fn
+        {
+        public:
+
+            using xexpression_type = std::decay_t<CT>;
+            using value_type = typename xexpression_type::value_type;
+
+            template <class CTA>
+            diagonal_fn(CTA&& source, int offset, std::size_t axis_1, std::size_t axis_2)
+                : m_source(std::forward<CTA>(source))
+                , m_offset(offset)
+                , m_axis_1(axis_1)
+                , m_axis_2(axis_2)
+            {
+            }
+
+            template <class It>
+            inline value_type operator()(It begin, It) const
+            {
+                xindex idx(m_source.shape().size());
+
+                for (std::size_t i = 0; i < idx.size(); i++)
+                {
+                    if (i != m_axis_1 && i != m_axis_2)
+                    {
+                        idx[i] = static_cast<std::size_t>(*begin++);
+                    }
+                }
+                using it_vtype = typename std::iterator_traits<It>::value_type;
+                it_vtype uoffset = static_cast<it_vtype>(m_offset);
+                if (m_offset >= 0)
+                {
+                    idx[m_axis_1] = static_cast<std::size_t>(*(begin));
+                    idx[m_axis_2] = static_cast<std::size_t>(*(begin) + uoffset);
+                }
+                else
+                {
+                    idx[m_axis_1] = static_cast<std::size_t>(*(begin) -uoffset);
+                    idx[m_axis_2] = static_cast<std::size_t>(*(begin));
+                }
+                return m_source[idx];
+            }
+
+        private:
+
+            CT m_source;
+            const int m_offset;
+            const std::size_t m_axis_1;
+            const std::size_t m_axis_2;
+        };
+
+        template <class CT>
+        class diag_fn
+        {
+        public:
+
+            using xexpression_type = std::decay_t<CT>;
+            using value_type = typename xexpression_type::value_type;
+
+            template <class CTA>
+            diag_fn(CTA&& source, int k)
+                : m_source(std::forward<CTA>(source))
+                , m_k(k)
+            {
+            }
+
+            template <class It>
+            inline value_type operator()(It begin, It) const
+            {
+                using it_vtype = typename std::iterator_traits<It>::value_type;
+                it_vtype umk = static_cast<it_vtype>(m_k);
+                if (m_k > 0)
+                {
+                    return *begin + umk == *(begin + 1) ? m_source(*begin) : value_type(0);
+                }
+                else
+                {
+                    return *begin + umk == *(begin + 1) ? m_source(*begin + umk) : value_type(0);
+                }
+            }
+
+        private:
+
+            CT m_source;
+            const int m_k;
+        };
+
+        template <class CT, class Comp>
+        class trilu_fn
+        {
+        public:
+
+            using xexpression_type = std::decay_t<CT>;
+            using value_type = typename xexpression_type::value_type;
+            using signed_idx_type = long int;
+
+            template <class CTA>
+            trilu_fn(CTA&& source, int k, Comp comp)
+                : m_source(std::forward<CTA>(source))
+                , m_k(k)
+                , m_comp(comp)
+            {
+            }
+
+            template <class It>
+            inline value_type operator()(It begin, It end) const
+            {
+                // have to cast to signed int otherwise -1 can lead to overflow
+                return m_comp(signed_idx_type(*begin) + m_k, signed_idx_type(*(begin + 1)))
+                           ? m_source.element(begin, end)
+                           : value_type(0);
+            }
+
+        private:
+
+            CT m_source;
+            const signed_idx_type m_k;
+            const Comp m_comp;
+        };
+    }
+
+    namespace detail
+    {
+        // meta-function returning the shape type for a diagonal
+        template <class ST, class... S>
+        struct diagonal_shape_type
+        {
+            using type = ST;
+        };
+
+        template <class I, std::size_t L>
+        struct diagonal_shape_type<std::array<I, L>>
+        {
+            using type = std::array<I, L - 1>;
+        };
+    }
+
+    /**
+     * @brief Returns the elements on the diagonal of arr
+     * If arr has more than two dimensions, then the axes specified by
+     * axis_1 and axis_2 are used to determine the 2-D sub-array whose
+     * diagonal is returned. The shape of the resulting array can be
+     * determined by removing axis1 and axis2 and appending an index
+     * to the right equal to the size of the resulting diagonals.
+     *
+     * @param arr the input array
+     * @param offset offset of the diagonal from the main diagonal. Can
+     *               be positive or negative.
+     * @param axis_1 Axis to be used as the first axis of the 2-D sub-arrays
+     *               from which the diagonals should be taken.
+     * @param axis_2 Axis to be used as the second axis of the 2-D sub-arrays
+     *               from which the diagonals should be taken.
+     * @returns xexpression with values of the diagonal
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {{1, 2, 3},
+     *                         {4, 5, 6}
+     *                         {7, 8, 9}};
+     * auto b = xt::diagonal(a); // => {1, 5, 9}
+     * @endcode
+     */
+    template <class E>
+    inline auto diagonal(E&& arr, int offset = 0, std::size_t axis_1 = 0, std::size_t axis_2 = 1)
+    {
+        using CT = xclosure_t<E>;
+        using shape_type = typename detail::diagonal_shape_type<typename std::decay_t<E>::shape_type>::type;
+
+        auto shape = arr.shape();
+        auto dimension = arr.dimension();
+
+        // The following shape calculation code is an almost verbatim adaptation of NumPy:
+        // https://github.com/numpy/numpy/blob/2aabeafb97bea4e1bfa29d946fbf31e1104e7ae0/numpy/core/src/multiarray/item_selection.c#L1799
+        auto ret_shape = xtl::make_sequence<shape_type>(dimension - 1, 0);
+        int dim_1 = static_cast<int>(shape[axis_1]);
+        int dim_2 = static_cast<int>(shape[axis_2]);
+
+        offset >= 0 ? dim_2 -= offset : dim_1 += offset;
+
+        auto diag_size = std::size_t(dim_2 < dim_1 ? dim_2 : dim_1);
+
+        std::size_t i = 0;
+        for (std::size_t idim = 0; idim < dimension; ++idim)
+        {
+            if (idim != axis_1 && idim != axis_2)
+            {
+                ret_shape[i++] = shape[idim];
+            }
+        }
+
+        ret_shape.back() = diag_size;
+
+        return detail::make_xgenerator(
+            detail::fn_impl<detail::diagonal_fn<CT>>(
+                detail::diagonal_fn<CT>(std::forward<E>(arr), offset, axis_1, axis_2)
+            ),
+            ret_shape
+        );
+    }
+
+    /**
+     * @brief xexpression with values of arr on the diagonal, zeroes otherwise
+     *
+     * @param arr the 1D input array of length n
+     * @param k the offset of the considered diagonal
+     * @returns xexpression function with shape n x n and arr on the diagonal
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {1, 5, 9};
+     * auto b = xt::diag(a); // => {{1, 0, 0},
+     *                       //     {0, 5, 0},
+     *                       //     {0, 0, 9}}
+     * @endcode
+     */
+    template <class E>
+    inline auto diag(E&& arr, int k = 0)
+    {
+        using CT = xclosure_t<E>;
+        std::size_t sk = std::size_t(std::abs(k));
+        std::size_t s = arr.shape()[0] + sk;
+        return detail::make_xgenerator(
+            detail::fn_impl<detail::diag_fn<CT>>(detail::diag_fn<CT>(std::forward<E>(arr), k)),
+            {s, s}
+        );
+    }
+
+    /**
+     * @brief Extract lower triangular matrix from xexpression. The parameter k selects the
+     *        offset of the diagonal.
+     *
+     * @param arr the input array
+     * @param k the diagonal above which to zero elements. 0 (default) selects the main diagonal,
+     *          k < 0 is below the main diagonal, k > 0 above.
+     * @returns xexpression containing lower triangle from arr, 0 otherwise
+     */
+    template <class E>
+    inline auto tril(E&& arr, int k = 0)
+    {
+        using CT = xclosure_t<E>;
+        auto shape = arr.shape();
+        return detail::make_xgenerator(
+            detail::fn_impl<detail::trilu_fn<CT, std::greater_equal<long int>>>(
+                detail::trilu_fn<CT, std::greater_equal<long int>>(
+                    std::forward<E>(arr),
+                    k,
+                    std::greater_equal<long int>()
+                )
+            ),
+            shape
+        );
+    }
+
+    /**
+     * @brief Extract upper triangular matrix from xexpression. The parameter k selects the
+     *        offset of the diagonal.
+     *
+     * @param arr the input array
+     * @param k the diagonal below which to zero elements. 0 (default) selects the main diagonal,
+     *          k < 0 is below the main diagonal, k > 0 above.
+     * @returns xexpression containing lower triangle from arr, 0 otherwise
+     */
+    template <class E>
+    inline auto triu(E&& arr, int k = 0)
+    {
+        using CT = xclosure_t<E>;
+        auto shape = arr.shape();
+        return detail::make_xgenerator(
+            detail::fn_impl<detail::trilu_fn<CT, std::less_equal<long int>>>(
+                detail::trilu_fn<CT, std::less_equal<long int>>(std::forward<E>(arr), k, std::less_equal<long int>())
+            ),
+            shape
+        );
+    }
+}
+#endif

+ 686 - 0
3rd/numpy/include/xtensor/xchunked_array.hpp

@@ -0,0 +1,686 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_CHUNKED_ARRAY_HPP
+#define XTENSOR_CHUNKED_ARRAY_HPP
+
+#include <array>
+#include <vector>
+
+#include "xarray.hpp"
+#include "xchunked_assign.hpp"
+
+namespace xt
+{
+    /**
+     * @defgroup xt_xchunked_array
+     *
+     * Chunked array container.
+     * Defined in ``xtensor/xchunked_array.hpp``.
+     */
+
+    /******************************
+     * xchunked_array declaration *
+     ******************************/
+
+    template <class chunk_storage>
+    class xchunked_array;
+
+    template <class chunk_storage>
+    struct xcontainer_inner_types<xchunked_array<chunk_storage>>
+    {
+        using chunk_type = typename chunk_storage::value_type;
+        using const_reference = typename chunk_type::const_reference;
+        using reference = typename chunk_type::reference;
+        using size_type = std::size_t;
+        using storage_type = chunk_type;
+        using temporary_type = xchunked_array<chunk_storage>;
+    };
+
+    template <class chunk_storage>
+    struct xiterable_inner_types<xchunked_array<chunk_storage>>
+    {
+        using chunk_type = typename chunk_storage::value_type;
+        using inner_shape_type = typename chunk_type::shape_type;
+        using const_stepper = xindexed_stepper<xchunked_array<chunk_storage>, true>;
+        using stepper = xindexed_stepper<xchunked_array<chunk_storage>, false>;
+    };
+
+    template <class chunk_storage>
+    class xchunked_array : public xaccessible<xchunked_array<chunk_storage>>,
+                           public xiterable<xchunked_array<chunk_storage>>,
+                           public xchunked_semantic<xchunked_array<chunk_storage>>
+    {
+    public:
+
+        using chunk_storage_type = chunk_storage;
+        using chunk_type = typename chunk_storage::value_type;
+        using grid_shape_type = typename chunk_storage::shape_type;
+        using const_reference = typename chunk_type::const_reference;
+        using reference = typename chunk_type::reference;
+        using self_type = xchunked_array<chunk_storage>;
+        using semantic_base = xchunked_semantic<self_type>;
+        using iterable_base = xconst_iterable<self_type>;
+        using const_stepper = typename iterable_base::const_stepper;
+        using stepper = typename iterable_base::stepper;
+        using inner_types = xcontainer_inner_types<self_type>;
+        using size_type = typename inner_types::size_type;
+        using storage_type = typename inner_types::storage_type;
+        using value_type = typename storage_type::value_type;
+        using pointer = value_type*;
+        using const_pointer = const value_type*;
+        using difference_type = std::ptrdiff_t;
+        using shape_type = typename chunk_type::shape_type;
+        using temporary_type = typename inner_types::temporary_type;
+        using bool_load_type = xt::bool_load_type<value_type>;
+        static constexpr layout_type static_layout = layout_type::dynamic;
+        static constexpr bool contiguous_layout = false;
+        using chunk_iterator = xchunk_iterator<self_type>;
+        using const_chunk_iterator = xchunk_iterator<const self_type>;
+
+        template <class S>
+        xchunked_array(
+            chunk_storage_type&& chunks,
+            S&& shape,
+            S&& chunk_shape,
+            layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT
+        );
+        ~xchunked_array() = default;
+
+        xchunked_array(const xchunked_array&) = default;
+        xchunked_array& operator=(const xchunked_array&) = default;
+
+        xchunked_array(xchunked_array&&) = default;
+        xchunked_array& operator=(xchunked_array&&) = default;
+
+        template <class E>
+        xchunked_array(
+            const xexpression<E>& e,
+            chunk_storage_type&& chunks,
+            layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT
+        );
+
+        template <class E, class S>
+        xchunked_array(
+            const xexpression<E>& e,
+            chunk_storage_type&& chunks,
+            S&& chunk_shape,
+            layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT
+        );
+
+        template <class E>
+        xchunked_array& operator=(const xexpression<E>& e);
+
+        size_type dimension() const noexcept;
+        const shape_type& shape() const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+        template <class... Idxs>
+        reference operator()(Idxs... idxs);
+
+        template <class... Idxs>
+        const_reference operator()(Idxs... idxs) const;
+
+        template <class It>
+        reference element(It first, It last);
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        template <class S>
+        bool broadcast_shape(S& s, bool reuse_cache = false) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+
+        template <class S>
+        stepper stepper_begin(const S& shape) noexcept;
+        template <class S>
+        stepper stepper_end(const S& shape, layout_type) noexcept;
+
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type) const noexcept;
+
+        const shape_type& chunk_shape() const noexcept;
+        size_type grid_size() const noexcept;
+        const grid_shape_type& grid_shape() const noexcept;
+
+        chunk_storage_type& chunks();
+        const chunk_storage_type& chunks() const;
+
+        chunk_iterator chunk_begin();
+        chunk_iterator chunk_end();
+
+        const_chunk_iterator chunk_begin() const;
+        const_chunk_iterator chunk_end() const;
+        const_chunk_iterator chunk_cbegin() const;
+        const_chunk_iterator chunk_cend() const;
+
+    private:
+
+        template <class... Idxs>
+        using indexes_type = std::
+            pair<std::array<std::size_t, sizeof...(Idxs)>, std::array<std::size_t, sizeof...(Idxs)>>;
+
+        template <class... Idxs>
+        using chunk_indexes_type = std::array<std::pair<std::size_t, std::size_t>, sizeof...(Idxs)>;
+
+        template <std::size_t N>
+        using static_indexes_type = std::pair<std::array<std::size_t, N>, std::array<std::size_t, N>>;
+
+        using dynamic_indexes_type = std::pair<std::vector<std::size_t>, std::vector<std::size_t>>;
+
+        template <class S1, class S2>
+        void resize(S1&& shape, S2&& chunk_shape, layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT);
+
+        template <class... Idxs>
+        indexes_type<Idxs...> get_indexes(Idxs... idxs) const;
+
+        template <class Idx>
+        std::pair<std::size_t, std::size_t> get_chunk_indexes_in_dimension(std::size_t dim, Idx idx) const;
+
+        template <std::size_t... dims, class... Idxs>
+        chunk_indexes_type<Idxs...> get_chunk_indexes(std::index_sequence<dims...>, Idxs... idxs) const;
+
+        template <class T, std::size_t N>
+        static_indexes_type<N> unpack(const std::array<T, N>& arr) const;
+
+        template <class It>
+        dynamic_indexes_type get_indexes_dynamic(It first, It last) const;
+
+        shape_type m_shape;
+        shape_type m_chunk_shape;
+        chunk_storage_type m_chunks;
+    };
+
+    template <class E>
+    constexpr bool is_chunked(const xexpression<E>& e);
+
+    template <class E>
+    constexpr bool is_chunked();
+
+    /**
+     * Creates an in-memory chunked array.
+     *
+     * This function returns an uninitialized ``xt::xchunked_array<xt::xarray<T>>``.
+     *
+     * @ingroup xt_xchunked_array
+     *
+     * @tparam T The type of the elements (e.g. double)
+     * @tparam L The layout_type of the array
+     *
+     * @param shape The shape of the array
+     * @param chunk_shape The shape of a chunk
+     * @param chunk_memory_layout The layout of each chunk (default: XTENSOR_DEFAULT_LAYOUT)
+     *
+     * @return returns a ``xt::xchunked_array<xt::xarray<T>>`` with the given shape, chunk shape and memory
+     * layout.
+     */
+    template <class T, layout_type L = XTENSOR_DEFAULT_LAYOUT, class S>
+    xchunked_array<xarray<xarray<T>>>
+    chunked_array(S&& shape, S&& chunk_shape, layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT);
+
+    template <class T, layout_type L = XTENSOR_DEFAULT_LAYOUT, class S>
+    xchunked_array<xarray<xarray<T>>> chunked_array(
+        std::initializer_list<S> shape,
+        std::initializer_list<S> chunk_shape,
+        layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT
+    );
+
+    /**
+     * Creates an in-memory chunked array.
+     *
+     * This function returns a ``xt::xchunked_array<xt::xarray<T>>`` initialized from an expression.
+     *
+     * @ingroup xt_xchunked_array
+     *
+     * @tparam L The layout_type of the array
+     *
+     * @param e The expression to initialize the chunked array from
+     * @param chunk_shape The shape of a chunk
+     * @param chunk_memory_layout The layout of each chunk (default: XTENSOR_DEFAULT_LAYOUT)
+     *
+     * @return returns a ``xt::xchunked_array<xt::xarray<T>>`` from the given expression, with the given chunk
+     * shape and memory layout.
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class E, class S>
+    xchunked_array<xarray<xarray<typename E::value_type>>>
+    chunked_array(const xexpression<E>& e, S&& chunk_shape, layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT);
+
+    /**
+     * Creates an in-memory chunked array.
+     *
+     * This function returns a ``xt::xchunked_array<xt::xarray<T>>`` initialized from an expression.
+     *
+     * @ingroup xt_xchunked_array
+     *
+     * @tparam L The layout_type of the array
+     *
+     * @param e The expression to initialize the chunked array from
+     * @param chunk_memory_layout The layout of each chunk (default: XTENSOR_DEFAULT_LAYOUT)
+     *
+     * @return returns a ``xt::xchunked_array<xt::xarray<T>>`` from the given expression, with the
+     * expression's chunk shape and the given memory layout.
+     */
+    template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class E>
+    xchunked_array<xarray<xarray<typename E::value_type>>>
+    chunked_array(const xexpression<E>& e, layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT);
+
+    /*******************************
+     * chunk_helper implementation *
+     *******************************/
+
+    namespace detail
+    {
+        // Workaround for VS2015
+        template <class E>
+        using try_chunk_shape = decltype(std::declval<E>().chunk_shape());
+
+        template <class E, template <class> class OP, class = void>
+        struct chunk_helper_impl
+        {
+            using is_chunked = std::false_type;
+
+            static const auto& chunk_shape(const xexpression<E>& e)
+            {
+                return e.derived_cast().shape();
+            }
+
+            template <class S1, class S2>
+            static void
+            resize(E& chunks, const S1& container_shape, const S2& chunk_shape, layout_type chunk_memory_layout)
+            {
+                chunks.resize(container_shape);
+                for (auto& c : chunks)
+                {
+                    c.resize(chunk_shape, chunk_memory_layout);
+                }
+            }
+        };
+
+        template <class E, template <class> class OP>
+        struct chunk_helper_impl<E, OP, void_t<OP<E>>>
+        {
+            using is_chunked = std::true_type;
+
+            static const auto& chunk_shape(const xexpression<E>& e)
+            {
+                return e.derived_cast().chunk_shape();
+            }
+
+            template <class S1, class S2>
+            static void
+            resize(E& chunks, const S1& container_shape, const S2& /*chunk_shape*/, layout_type /*chunk_memory_layout*/)
+            {
+                chunks.resize(container_shape);
+            }
+        };
+
+        template <class E>
+        using chunk_helper = chunk_helper_impl<E, try_chunk_shape>;
+    }
+
+    template <class E>
+    constexpr bool is_chunked(const xexpression<E>&)
+    {
+        return is_chunked<E>();
+    }
+
+    template <class E>
+    constexpr bool is_chunked()
+    {
+        using return_type = typename detail::chunk_helper<E>::is_chunked;
+        return return_type::value;
+    }
+
+    template <class T, layout_type L, class S>
+    inline xchunked_array<xarray<xarray<T>>>
+    chunked_array(S&& shape, S&& chunk_shape, layout_type chunk_memory_layout)
+    {
+        using chunk_storage = xarray<xarray<T, L>>;
+        return xchunked_array<chunk_storage>(
+            chunk_storage(),
+            std::forward<S>(shape),
+            std::forward<S>(chunk_shape),
+            chunk_memory_layout
+        );
+    }
+
+    template <class T, layout_type L, class S>
+    xchunked_array<xarray<xarray<T>>>
+    chunked_array(std::initializer_list<S> shape, std::initializer_list<S> chunk_shape, layout_type chunk_memory_layout)
+    {
+        using sh_type = std::vector<std::size_t>;
+        auto sh = xtl::forward_sequence<sh_type, std::initializer_list<S>>(shape);
+        auto ch_sh = xtl::forward_sequence<sh_type, std::initializer_list<S>>(chunk_shape);
+        return chunked_array<T, L, sh_type>(std::move(sh), std::move(ch_sh), chunk_memory_layout);
+    }
+
+    template <layout_type L, class E, class S>
+    inline xchunked_array<xarray<xarray<typename E::value_type>>>
+    chunked_array(const xexpression<E>& e, S&& chunk_shape, layout_type chunk_memory_layout)
+    {
+        using chunk_storage = xarray<xarray<typename E::value_type, L>>;
+        return xchunked_array<chunk_storage>(e, chunk_storage(), std::forward<S>(chunk_shape), chunk_memory_layout);
+    }
+
+    template <layout_type L, class E>
+    inline xchunked_array<xarray<xarray<typename E::value_type>>>
+    chunked_array(const xexpression<E>& e, layout_type chunk_memory_layout)
+    {
+        using chunk_storage = xarray<xarray<typename E::value_type, L>>;
+        return xchunked_array<chunk_storage>(e, chunk_storage(), chunk_memory_layout);
+    }
+
+    /*********************************
+     * xchunked_array implementation *
+     *********************************/
+
+    template <class CS>
+    template <class S>
+    inline xchunked_array<CS>::xchunked_array(CS&& chunks, S&& shape, S&& chunk_shape, layout_type chunk_memory_layout)
+        : m_chunks(std::move(chunks))
+    {
+        resize(std::forward<S>(shape), std::forward<S>(chunk_shape), chunk_memory_layout);
+    }
+
+    template <class CS>
+    template <class E>
+    inline xchunked_array<CS>::xchunked_array(const xexpression<E>& e, CS&& chunks, layout_type chunk_memory_layout)
+        : xchunked_array(e, std::move(chunks), detail::chunk_helper<E>::chunk_shape(e), chunk_memory_layout)
+    {
+    }
+
+    template <class CS>
+    template <class E, class S>
+    inline xchunked_array<CS>::xchunked_array(
+        const xexpression<E>& e,
+        CS&& chunks,
+        S&& chunk_shape,
+        layout_type chunk_memory_layout
+    )
+        : m_chunks(std::move(chunks))
+    {
+        resize(e.derived_cast().shape(), std::forward<S>(chunk_shape), chunk_memory_layout);
+        semantic_base::assign_xexpression(e);
+    }
+
+    template <class CS>
+    template <class E>
+    inline auto xchunked_array<CS>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::dimension() const noexcept -> size_type
+    {
+        return m_shape.size();
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::shape() const noexcept -> const shape_type&
+    {
+        return m_shape;
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::layout() const noexcept -> layout_type
+    {
+        return static_layout;
+    }
+
+    template <class CS>
+    inline bool xchunked_array<CS>::is_contiguous() const noexcept
+    {
+        return false;
+    }
+
+    template <class CS>
+    template <class... Idxs>
+    inline auto xchunked_array<CS>::operator()(Idxs... idxs) -> reference
+    {
+        auto ii = get_indexes(idxs...);
+        auto& chunk = m_chunks.element(ii.first.cbegin(), ii.first.cend());
+        return chunk.element(ii.second.cbegin(), ii.second.cend());
+    }
+
+    template <class CS>
+    template <class... Idxs>
+    inline auto xchunked_array<CS>::operator()(Idxs... idxs) const -> const_reference
+    {
+        auto ii = get_indexes(idxs...);
+        auto& chunk = m_chunks.element(ii.first.cbegin(), ii.first.cend());
+        return chunk.element(ii.second.cbegin(), ii.second.cend());
+    }
+
+    template <class CS>
+    template <class It>
+    inline auto xchunked_array<CS>::element(It first, It last) -> reference
+    {
+        auto ii = get_indexes_dynamic(first, last);
+        auto& chunk = m_chunks.element(ii.first.begin(), ii.first.end());
+        return chunk.element(ii.second.begin(), ii.second.end());
+    }
+
+    template <class CS>
+    template <class It>
+    inline auto xchunked_array<CS>::element(It first, It last) const -> const_reference
+    {
+        auto ii = get_indexes_dynamic(first, last);
+        auto& chunk = m_chunks.element(ii.first.begin(), ii.first.end());
+        return chunk.element(ii.second.begin(), ii.second.end());
+    }
+
+    template <class CS>
+    template <class S>
+    inline bool xchunked_array<CS>::broadcast_shape(S& s, bool) const
+    {
+        return xt::broadcast_shape(shape(), s);
+    }
+
+    template <class CS>
+    template <class S>
+    inline bool xchunked_array<CS>::has_linear_assign(const S&) const noexcept
+    {
+        return false;
+    }
+
+    template <class CS>
+    template <class S>
+    inline auto xchunked_array<CS>::stepper_begin(const S& shape) noexcept -> stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return stepper(this, offset);
+    }
+
+    template <class CS>
+    template <class S>
+    inline auto xchunked_array<CS>::stepper_end(const S& shape, layout_type) noexcept -> stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return stepper(this, offset, true);
+    }
+
+    template <class CS>
+    template <class S>
+    inline auto xchunked_array<CS>::stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(this, offset);
+    }
+
+    template <class CS>
+    template <class S>
+    inline auto xchunked_array<CS>::stepper_end(const S& shape, layout_type) const noexcept -> const_stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(this, offset, true);
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunk_shape() const noexcept -> const shape_type&
+    {
+        return m_chunk_shape;
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::grid_size() const noexcept -> size_type
+    {
+        return m_chunks.size();
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::grid_shape() const noexcept -> const grid_shape_type&
+    {
+        return m_chunks.shape();
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunks() -> chunk_storage_type&
+    {
+        return m_chunks;
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunks() const -> const chunk_storage_type&
+    {
+        return m_chunks;
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunk_begin() -> chunk_iterator
+    {
+        shape_type chunk_index(m_shape.size(), size_type(0));
+        return chunk_iterator(*this, std::move(chunk_index), 0u);
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunk_end() -> chunk_iterator
+    {
+        shape_type sh = xtl::forward_sequence<shape_type, const grid_shape_type>(grid_shape());
+        return chunk_iterator(*this, std::move(sh), grid_size());
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunk_begin() const -> const_chunk_iterator
+    {
+        shape_type chunk_index(m_shape.size(), size_type(0));
+        return const_chunk_iterator(*this, std::move(chunk_index), 0u);
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunk_end() const -> const_chunk_iterator
+    {
+        shape_type sh = xtl::forward_sequence<shape_type, const grid_shape_type>(grid_shape());
+        return const_chunk_iterator(*this, std::move(sh), grid_size());
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunk_cbegin() const -> const_chunk_iterator
+    {
+        return chunk_begin();
+    }
+
+    template <class CS>
+    inline auto xchunked_array<CS>::chunk_cend() const -> const_chunk_iterator
+    {
+        return chunk_end();
+    }
+
+    template <class CS>
+    template <class S1, class S2>
+    inline void xchunked_array<CS>::resize(S1&& shape, S2&& chunk_shape, layout_type chunk_memory_layout)
+    {
+        // compute chunk number in each dimension (shape_of_chunks)
+        std::vector<std::size_t> shape_of_chunks(shape.size());
+        std::transform(
+            shape.cbegin(),
+            shape.cend(),
+            chunk_shape.cbegin(),
+            shape_of_chunks.begin(),
+            [](auto s, auto cs)
+            {
+                std::size_t cn = s / cs;
+                if (s % cs > 0)
+                {
+                    cn += std::size_t(1);  // edge_chunk
+                }
+                return cn;
+            }
+        );
+
+        detail::chunk_helper<CS>::resize(m_chunks, shape_of_chunks, chunk_shape, chunk_memory_layout);
+
+        m_shape = xtl::forward_sequence<shape_type, S1>(shape);
+        m_chunk_shape = xtl::forward_sequence<shape_type, S2>(chunk_shape);
+    }
+
+    template <class CS>
+    template <class... Idxs>
+    inline auto xchunked_array<CS>::get_indexes(Idxs... idxs) const -> indexes_type<Idxs...>
+    {
+        auto chunk_indexes_packed = get_chunk_indexes(std::make_index_sequence<sizeof...(Idxs)>(), idxs...);
+        return unpack(chunk_indexes_packed);
+    }
+
+    template <class CS>
+    template <class Idx>
+    inline std::pair<std::size_t, std::size_t>
+    xchunked_array<CS>::get_chunk_indexes_in_dimension(std::size_t dim, Idx idx) const
+    {
+        std::size_t index_of_chunk = static_cast<size_t>(idx) / m_chunk_shape[dim];
+        std::size_t index_in_chunk = static_cast<size_t>(idx) - index_of_chunk * m_chunk_shape[dim];
+        return std::make_pair(index_of_chunk, index_in_chunk);
+    }
+
+    template <class CS>
+    template <std::size_t... dims, class... Idxs>
+    inline auto xchunked_array<CS>::get_chunk_indexes(std::index_sequence<dims...>, Idxs... idxs) const
+        -> chunk_indexes_type<Idxs...>
+    {
+        chunk_indexes_type<Idxs...> chunk_indexes = {{get_chunk_indexes_in_dimension(dims, idxs)...}};
+        return chunk_indexes;
+    }
+
+    template <class CS>
+    template <class T, std::size_t N>
+    inline auto xchunked_array<CS>::unpack(const std::array<T, N>& arr) const -> static_indexes_type<N>
+    {
+        std::array<std::size_t, N> arr0;
+        std::array<std::size_t, N> arr1;
+        for (std::size_t i = 0; i < N; ++i)
+        {
+            arr0[i] = std::get<0>(arr[i]);
+            arr1[i] = std::get<1>(arr[i]);
+        }
+        return std::make_pair(arr0, arr1);
+    }
+
+    template <class CS>
+    template <class It>
+    inline auto xchunked_array<CS>::get_indexes_dynamic(It first, It last) const -> dynamic_indexes_type
+    {
+        auto size = static_cast<std::size_t>(std::distance(first, last));
+        std::vector<std::size_t> indexes_of_chunk(size);
+        std::vector<std::size_t> indexes_in_chunk(size);
+        for (std::size_t dim = 0; dim < size; ++dim)
+        {
+            auto chunk_index = get_chunk_indexes_in_dimension(dim, *first++);
+            indexes_of_chunk[dim] = chunk_index.first;
+            indexes_in_chunk[dim] = chunk_index.second;
+        }
+        return std::make_pair(indexes_of_chunk, indexes_in_chunk);
+    }
+}
+
+#endif

+ 378 - 0
3rd/numpy/include/xtensor/xchunked_assign.hpp

@@ -0,0 +1,378 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_CHUNKED_ASSIGN_HPP
+#define XTENSOR_CHUNKED_ASSIGN_HPP
+
+#include "xnoalias.hpp"
+#include "xstrided_view.hpp"
+
+namespace xt
+{
+
+    /*******************
+     * xchunk_assigner *
+     *******************/
+
+    template <class T, class chunk_storage>
+    class xchunked_assigner
+    {
+    public:
+
+        using temporary_type = T;
+
+        template <class E, class DST>
+        void build_and_assign_temporary(const xexpression<E>& e, DST& dst);
+    };
+
+    /*********************************
+     * xchunked_semantic declaration *
+     *********************************/
+
+    template <class D>
+    class xchunked_semantic : public xsemantic_base<D>
+    {
+    public:
+
+        using base_type = xsemantic_base<D>;
+        using derived_type = D;
+        using temporary_type = typename base_type::temporary_type;
+
+        template <class E>
+        derived_type& assign_xexpression(const xexpression<E>& e);
+
+        template <class E>
+        derived_type& computed_assign(const xexpression<E>& e);
+
+        template <class E, class F>
+        derived_type& scalar_computed_assign(const E& e, F&& f);
+
+    protected:
+
+        xchunked_semantic() = default;
+        ~xchunked_semantic() = default;
+
+        xchunked_semantic(const xchunked_semantic&) = default;
+        xchunked_semantic& operator=(const xchunked_semantic&) = default;
+
+        xchunked_semantic(xchunked_semantic&&) = default;
+        xchunked_semantic& operator=(xchunked_semantic&&) = default;
+
+        template <class E>
+        derived_type& operator=(const xexpression<E>& e);
+
+    private:
+
+        template <class CS>
+        xchunked_assigner<temporary_type, CS> get_assigner(const CS&) const;
+    };
+
+    /*******************
+     * xchunk_iterator *
+     *******************/
+
+    template <class CS>
+    class xchunked_array;
+
+    template <class E>
+    class xchunked_view;
+
+    namespace detail
+    {
+        template <class T>
+        struct is_xchunked_array : std::false_type
+        {
+        };
+
+        template <class CS>
+        struct is_xchunked_array<xchunked_array<CS>> : std::true_type
+        {
+        };
+
+        template <class T>
+        struct is_xchunked_view : std::false_type
+        {
+        };
+
+        template <class E>
+        struct is_xchunked_view<xchunked_view<E>> : std::true_type
+        {
+        };
+
+        struct invalid_chunk_iterator
+        {
+        };
+
+        template <class A>
+        struct xchunk_iterator_array
+        {
+            using reference = decltype(*(std::declval<A>().chunks().begin()));
+
+            inline decltype(auto) get_chunk(A& arr, typename A::size_type i, const xstrided_slice_vector&) const
+            {
+                using difference_type = typename A::difference_type;
+                return *(arr.chunks().begin() + static_cast<difference_type>(i));
+            }
+        };
+
+        template <class V>
+        struct xchunk_iterator_view
+        {
+            using reference = decltype(xt::strided_view(
+                std::declval<V>().expression(),
+                std::declval<xstrided_slice_vector>()
+            ));
+
+            inline auto get_chunk(V& view, typename V::size_type, const xstrided_slice_vector& sv) const
+            {
+                return xt::strided_view(view.expression(), sv);
+            }
+        };
+
+        template <class T>
+        struct xchunk_iterator_base
+            : std::conditional_t<
+                  is_xchunked_array<std::decay_t<T>>::value,
+                  xchunk_iterator_array<T>,
+                  std::conditional_t<is_xchunked_view<std::decay_t<T>>::value, xchunk_iterator_view<T>, invalid_chunk_iterator>>
+        {
+        };
+    }
+
+    template <class E>
+    class xchunk_iterator : private detail::xchunk_iterator_base<E>
+    {
+    public:
+
+        using base_type = detail::xchunk_iterator_base<E>;
+        using self_type = xchunk_iterator<E>;
+        using size_type = typename E::size_type;
+        using shape_type = typename E::shape_type;
+        using slice_vector = xstrided_slice_vector;
+
+        using reference = typename base_type::reference;
+        using value_type = std::remove_reference_t<reference>;
+        using pointer = value_type*;
+        using difference_type = typename E::difference_type;
+        using iterator_category = std::forward_iterator_tag;
+
+
+        xchunk_iterator() = default;
+        xchunk_iterator(E& chunked_expression, shape_type&& chunk_index, size_type chunk_linear_index);
+
+        self_type& operator++();
+        self_type operator++(int);
+        decltype(auto) operator*() const;
+
+        bool operator==(const self_type& rhs) const;
+        bool operator!=(const self_type& rhs) const;
+
+        const shape_type& chunk_index() const;
+
+        const slice_vector& get_slice_vector() const;
+        slice_vector get_chunk_slice_vector() const;
+
+    private:
+
+        void fill_slice_vector(size_type index);
+
+        E* p_chunked_expression;
+        shape_type m_chunk_index;
+        size_type m_chunk_linear_index;
+        xstrided_slice_vector m_slice_vector;
+    };
+
+    /************************************
+     * xchunked_semantic implementation *
+     ************************************/
+
+    template <class T, class CS>
+    template <class E, class DST>
+    inline void xchunked_assigner<T, CS>::build_and_assign_temporary(const xexpression<E>& e, DST& dst)
+    {
+        temporary_type tmp(e, CS(), dst.chunk_shape());
+        dst = std::move(tmp);
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xchunked_semantic<D>::assign_xexpression(const xexpression<E>& e) -> derived_type&
+    {
+        auto& d = this->derived_cast();
+        const auto& chunk_shape = d.chunk_shape();
+        size_t i = 0;
+        auto it_end = d.chunk_end();
+        for (auto it = d.chunk_begin(); it != it_end; ++it, ++i)
+        {
+            auto rhs = strided_view(e.derived_cast(), it.get_slice_vector());
+            if (rhs.shape() != chunk_shape)
+            {
+                noalias(strided_view(*it, it.get_chunk_slice_vector())) = rhs;
+            }
+            else
+            {
+                noalias(*it) = rhs;
+            }
+        }
+
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xchunked_semantic<D>::computed_assign(const xexpression<E>& e) -> derived_type&
+    {
+        D& d = this->derived_cast();
+        if (e.derived_cast().dimension() > d.dimension() || e.derived_cast().shape() > d.shape())
+        {
+            return operator=(e);
+        }
+        else
+        {
+            return assign_xexpression(e);
+        }
+    }
+
+    template <class D>
+    template <class E, class F>
+    inline auto xchunked_semantic<D>::scalar_computed_assign(const E& e, F&& f) -> derived_type&
+    {
+        for (auto& c : this->derived_cast().chunks())
+        {
+            c.scalar_computed_assign(e, f);
+        }
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xchunked_semantic<D>::operator=(const xexpression<E>& e) -> derived_type&
+    {
+        D& d = this->derived_cast();
+        get_assigner(d.chunks()).build_and_assign_temporary(e, d);
+        return d;
+    }
+
+    template <class D>
+    template <class CS>
+    inline auto xchunked_semantic<D>::get_assigner(const CS&) const -> xchunked_assigner<temporary_type, CS>
+    {
+        return xchunked_assigner<temporary_type, CS>();
+    }
+
+    /**********************************
+     * xchunk_iterator implementation *
+     **********************************/
+
+    template <class E>
+    inline xchunk_iterator<E>::xchunk_iterator(E& expression, shape_type&& chunk_index, size_type chunk_linear_index)
+        : p_chunked_expression(&expression)
+        , m_chunk_index(std::move(chunk_index))
+        , m_chunk_linear_index(chunk_linear_index)
+        , m_slice_vector(m_chunk_index.size())
+    {
+        for (size_type i = 0; i < m_chunk_index.size(); ++i)
+        {
+            fill_slice_vector(i);
+        }
+    }
+
+    template <class E>
+    inline xchunk_iterator<E>& xchunk_iterator<E>::operator++()
+    {
+        if (m_chunk_linear_index + 1u != p_chunked_expression->grid_size())
+        {
+            size_type i = p_chunked_expression->dimension();
+            while (i != 0)
+            {
+                --i;
+                if (m_chunk_index[i] + 1u == p_chunked_expression->grid_shape()[i])
+                {
+                    m_chunk_index[i] = 0;
+                    fill_slice_vector(i);
+                }
+                else
+                {
+                    m_chunk_index[i] += 1;
+                    fill_slice_vector(i);
+                    break;
+                }
+            }
+        }
+        m_chunk_linear_index++;
+        return *this;
+    }
+
+    template <class E>
+    inline xchunk_iterator<E> xchunk_iterator<E>::operator++(int)
+    {
+        xchunk_iterator<E> it = *this;
+        ++(*this);
+        return it;
+    }
+
+    template <class E>
+    inline decltype(auto) xchunk_iterator<E>::operator*() const
+    {
+        return base_type::get_chunk(*p_chunked_expression, m_chunk_linear_index, m_slice_vector);
+    }
+
+    template <class E>
+    inline bool xchunk_iterator<E>::operator==(const xchunk_iterator& other) const
+    {
+        return m_chunk_linear_index == other.m_chunk_linear_index;
+    }
+
+    template <class E>
+    inline bool xchunk_iterator<E>::operator!=(const xchunk_iterator& other) const
+    {
+        return !(*this == other);
+    }
+
+    template <class E>
+    inline auto xchunk_iterator<E>::get_slice_vector() const -> const slice_vector&
+    {
+        return m_slice_vector;
+    }
+
+    template <class E>
+    auto xchunk_iterator<E>::chunk_index() const -> const shape_type&
+    {
+        return m_chunk_index;
+    }
+
+    template <class E>
+    inline auto xchunk_iterator<E>::get_chunk_slice_vector() const -> slice_vector
+    {
+        slice_vector slices(m_chunk_index.size());
+        for (size_type i = 0; i < m_chunk_index.size(); ++i)
+        {
+            size_type chunk_shape = p_chunked_expression->chunk_shape()[i];
+            size_type end = std::min(
+                chunk_shape,
+                p_chunked_expression->shape()[i] - m_chunk_index[i] * chunk_shape
+            );
+            slices[i] = range(0u, end);
+        }
+        return slices;
+    }
+
+    template <class E>
+    inline void xchunk_iterator<E>::fill_slice_vector(size_type i)
+    {
+        size_type range_start = m_chunk_index[i] * p_chunked_expression->chunk_shape()[i];
+        size_type range_end = std::min(
+            (m_chunk_index[i] + 1) * p_chunked_expression->chunk_shape()[i],
+            p_chunked_expression->shape()[i]
+        );
+        m_slice_vector[i] = range(range_start, range_end);
+    }
+}
+
+#endif

+ 295 - 0
3rd/numpy/include/xtensor/xchunked_view.hpp

@@ -0,0 +1,295 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_CHUNKED_VIEW_HPP
+#define XTENSOR_CHUNKED_VIEW_HPP
+
+#include <xtl/xsequence.hpp>
+
+#include "xchunked_array.hpp"
+#include "xnoalias.hpp"
+#include "xstorage.hpp"
+#include "xstrided_view.hpp"
+
+namespace xt
+{
+
+    template <class E>
+    struct is_chunked_t : detail::chunk_helper<E>::is_chunked
+    {
+    };
+
+    /*****************
+     * xchunked_view *
+     *****************/
+
+    template <class E>
+    class xchunk_iterator;
+
+    template <class E>
+    class xchunked_view
+    {
+    public:
+
+        using self_type = xchunked_view<E>;
+        using expression_type = std::decay_t<E>;
+        using value_type = typename expression_type::value_type;
+        using reference = typename expression_type::reference;
+        using const_reference = typename expression_type::const_reference;
+        using pointer = typename expression_type::pointer;
+        using const_pointer = typename expression_type::const_pointer;
+        using size_type = typename expression_type::size_type;
+        using difference_type = typename expression_type::difference_type;
+        using shape_type = svector<size_type>;
+        using chunk_iterator = xchunk_iterator<self_type>;
+        using const_chunk_iterator = xchunk_iterator<const self_type>;
+
+        template <class OE, class S>
+        xchunked_view(OE&& e, S&& chunk_shape);
+
+        template <class OE>
+        xchunked_view(OE&& e);
+
+        void init();
+
+        template <class OE>
+        typename std::enable_if_t<!is_chunked_t<OE>::value, xchunked_view<E>&> operator=(const OE& e);
+
+        template <class OE>
+        typename std::enable_if_t<is_chunked_t<OE>::value, xchunked_view<E>&> operator=(const OE& e);
+
+        size_type dimension() const noexcept;
+        const shape_type& shape() const noexcept;
+        const shape_type& chunk_shape() const noexcept;
+        size_type grid_size() const noexcept;
+        const shape_type& grid_shape() const noexcept;
+
+        expression_type& expression() noexcept;
+        const expression_type& expression() const noexcept;
+
+        chunk_iterator chunk_begin();
+        chunk_iterator chunk_end();
+
+        const_chunk_iterator chunk_begin() const;
+        const_chunk_iterator chunk_end() const;
+        const_chunk_iterator chunk_cbegin() const;
+        const_chunk_iterator chunk_cend() const;
+
+    private:
+
+        E m_expression;
+        shape_type m_shape;
+        shape_type m_chunk_shape;
+        shape_type m_grid_shape;
+        size_type m_chunk_nb;
+    };
+
+    template <class E, class S>
+    xchunked_view<E> as_chunked(E&& e, S&& chunk_shape);
+
+    /********************************
+     * xchunked_view implementation *
+     ********************************/
+
+    template <class E>
+    template <class OE, class S>
+    inline xchunked_view<E>::xchunked_view(OE&& e, S&& chunk_shape)
+        : m_expression(std::forward<OE>(e))
+        , m_chunk_shape(xtl::forward_sequence<shape_type, S>(chunk_shape))
+    {
+        m_shape.resize(e.dimension());
+        const auto& s = e.shape();
+        std::copy(s.cbegin(), s.cend(), m_shape.begin());
+        init();
+    }
+
+    template <class E>
+    template <class OE>
+    inline xchunked_view<E>::xchunked_view(OE&& e)
+        : m_expression(std::forward<OE>(e))
+    {
+        m_shape.resize(e.dimension());
+        const auto& s = e.shape();
+        std::copy(s.cbegin(), s.cend(), m_shape.begin());
+    }
+
+    template <class E>
+    void xchunked_view<E>::init()
+    {
+        // compute chunk number in each dimension
+        m_grid_shape.resize(m_shape.size());
+        std::transform(
+            m_shape.cbegin(),
+            m_shape.cend(),
+            m_chunk_shape.cbegin(),
+            m_grid_shape.begin(),
+            [](auto s, auto cs)
+            {
+                std::size_t cn = s / cs;
+                if (s % cs > 0)
+                {
+                    cn++;  // edge_chunk
+                }
+                return cn;
+            }
+        );
+        m_chunk_nb = std::accumulate(
+            std::begin(m_grid_shape),
+            std::end(m_grid_shape),
+            std::size_t(1),
+            std::multiplies<>()
+        );
+    }
+
+    template <class E>
+    template <class OE>
+    typename std::enable_if_t<!is_chunked_t<OE>::value, xchunked_view<E>&>
+    xchunked_view<E>::operator=(const OE& e)
+    {
+        auto end = chunk_end();
+        for (auto it = chunk_begin(); it != end; ++it)
+        {
+            auto el = *it;
+            noalias(el) = strided_view(e, it.get_slice_vector());
+        }
+        return *this;
+    }
+
+    template <class E>
+    template <class OE>
+    typename std::enable_if_t<is_chunked_t<OE>::value, xchunked_view<E>&>
+    xchunked_view<E>::operator=(const OE& e)
+    {
+        m_chunk_shape.resize(e.dimension());
+        const auto& cs = e.chunk_shape();
+        std::copy(cs.cbegin(), cs.cend(), m_chunk_shape.begin());
+        init();
+        auto it2 = e.chunks().begin();
+        auto end1 = chunk_end();
+        for (auto it1 = chunk_begin(); it1 != end1; ++it1, ++it2)
+        {
+            auto el1 = *it1;
+            auto el2 = *it2;
+            auto lhs_shape = el1.shape();
+            if (lhs_shape != el2.shape())
+            {
+                xstrided_slice_vector esv(el2.dimension());  // element slice in edge chunk
+                std::transform(
+                    lhs_shape.begin(),
+                    lhs_shape.end(),
+                    esv.begin(),
+                    [](auto size)
+                    {
+                        return range(0, size);
+                    }
+                );
+                noalias(el1) = strided_view(el2, esv);
+            }
+            else
+            {
+                noalias(el1) = el2;
+            }
+        }
+        return *this;
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::dimension() const noexcept -> size_type
+    {
+        return m_shape.size();
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::shape() const noexcept -> const shape_type&
+    {
+        return m_shape;
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::chunk_shape() const noexcept -> const shape_type&
+    {
+        return m_chunk_shape;
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::grid_size() const noexcept -> size_type
+    {
+        return m_chunk_nb;
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::grid_shape() const noexcept -> const shape_type&
+    {
+        return m_grid_shape;
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::expression() noexcept -> expression_type&
+    {
+        return m_expression;
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::expression() const noexcept -> const expression_type&
+    {
+        return m_expression;
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::chunk_begin() -> chunk_iterator
+    {
+        shape_type chunk_index(m_shape.size(), size_type(0));
+        return chunk_iterator(*this, std::move(chunk_index), 0u);
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::chunk_end() -> chunk_iterator
+    {
+        return chunk_iterator(*this, shape_type(grid_shape()), grid_size());
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::chunk_begin() const -> const_chunk_iterator
+    {
+        shape_type chunk_index(m_shape.size(), size_type(0));
+        return const_chunk_iterator(*this, std::move(chunk_index), 0u);
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::chunk_end() const -> const_chunk_iterator
+    {
+        return const_chunk_iterator(*this, shape_type(grid_shape()), grid_size());
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::chunk_cbegin() const -> const_chunk_iterator
+    {
+        return chunk_begin();
+    }
+
+    template <class E>
+    inline auto xchunked_view<E>::chunk_cend() const -> const_chunk_iterator
+    {
+        return chunk_end();
+    }
+
+    template <class E, class S>
+    inline xchunked_view<E> as_chunked(E&& e, S&& chunk_shape)
+    {
+        return xchunked_view<E>(std::forward<E>(e), std::forward<S>(chunk_shape));
+    }
+
+    template <class E>
+    inline xchunked_view<E> as_chunked(E&& e)
+    {
+        return xchunked_view<E>(std::forward<E>(e));
+    }
+}
+
+#endif

+ 264 - 0
3rd/numpy/include/xtensor/xcomplex.hpp

@@ -0,0 +1,264 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_COMPLEX_HPP
+#define XTENSOR_COMPLEX_HPP
+
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xcomplex.hpp>
+
+#include "xtensor/xbuilder.hpp"
+#include "xtensor/xexpression.hpp"
+#include "xtensor/xoffset_view.hpp"
+
+namespace xt
+{
+
+    /**
+     * @defgroup xt_xcomplex
+     *
+     * Defined in ``xtensor/xcomplex.hpp``
+     */
+
+    /******************************
+     * real and imag declarations *
+     ******************************/
+
+    template <class E>
+    decltype(auto) real(E&& e) noexcept;
+
+    template <class E>
+    decltype(auto) imag(E&& e) noexcept;
+
+    /********************************
+     * real and imag implementation *
+     ********************************/
+
+    namespace detail
+    {
+        template <bool iscomplex = true>
+        struct complex_helper
+        {
+            template <class E>
+            inline static auto real(E&& e) noexcept
+            {
+                using real_type = typename std::decay_t<E>::value_type::value_type;
+                return xoffset_view<xclosure_t<E>, real_type, 0>(std::forward<E>(e));
+            }
+
+            template <class E>
+            inline static auto imag(E&& e) noexcept
+            {
+                using real_type = typename std::decay_t<E>::value_type::value_type;
+                return xoffset_view<xclosure_t<E>, real_type, sizeof(real_type)>(std::forward<E>(e));
+            }
+        };
+
+        template <>
+        struct complex_helper<false>
+        {
+            template <class E>
+            inline static decltype(auto) real(E&& e) noexcept
+            {
+                return std::forward<E>(e);
+            }
+
+            template <class E>
+            inline static auto imag(E&& e) noexcept
+            {
+                return zeros<typename std::decay_t<E>::value_type>(e.shape());
+            }
+        };
+
+        template <bool isexpression = true>
+        struct complex_expression_helper
+        {
+            template <class E>
+            inline static decltype(auto) real(E&& e) noexcept
+            {
+                return detail::complex_helper<xtl::is_complex<typename std::decay_t<E>::value_type>::value>::real(
+                    std::forward<E>(e)
+                );
+            }
+
+            template <class E>
+            inline static decltype(auto) imag(E&& e) noexcept
+            {
+                return detail::complex_helper<xtl::is_complex<typename std::decay_t<E>::value_type>::value>::imag(
+                    std::forward<E>(e)
+                );
+            }
+        };
+
+        template <>
+        struct complex_expression_helper<false>
+        {
+            template <class E>
+            inline static decltype(auto) real(E&& e) noexcept
+            {
+                return xtl::forward_real(std::forward<E>(e));
+            }
+
+            template <class E>
+            inline static decltype(auto) imag(E&& e) noexcept
+            {
+                return xtl::forward_imag(std::forward<E>(e));
+            }
+        };
+    }
+
+    /**
+     * Return an xt::xexpression representing the real part of the given expression.
+     *
+     * The returned expression either hold a const reference to @p e or a copy
+     * depending on whether @p e is an lvalue or an rvalue.
+     *
+     * @ingroup xt_xcomplex
+     * @tparam e The xt::xexpression
+     */
+    template <class E>
+    inline decltype(auto) real(E&& e) noexcept
+    {
+        return detail::complex_expression_helper<is_xexpression<std::decay_t<E>>::value>::real(std::forward<E>(e
+        ));
+    }
+
+    /**
+     * Return an xt::xexpression representing the imaginary part of the given expression.
+     *
+     * The returned expression either hold a const reference to @p e or a copy
+     * depending on whether @p e is an lvalue or an rvalue.
+     *
+     * @ingroup xt_xcomplex
+     * @tparam e The xt::xexpression
+     */
+    template <class E>
+    inline decltype(auto) imag(E&& e) noexcept
+    {
+        return detail::complex_expression_helper<is_xexpression<std::decay_t<E>>::value>::imag(std::forward<E>(e
+        ));
+    }
+
+#define UNARY_COMPLEX_FUNCTOR(NS, NAME)             \
+    struct NAME##_fun                               \
+    {                                               \
+        template <class T>                          \
+        constexpr auto operator()(const T& t) const \
+        {                                           \
+            using NS::NAME;                         \
+            return NAME(t);                         \
+        }                                           \
+                                                    \
+        template <class B>                          \
+        constexpr auto simd_apply(const B& t) const \
+        {                                           \
+            using NS::NAME;                         \
+            return NAME(t);                         \
+        }                                           \
+    }
+
+    namespace math
+    {
+        namespace detail
+        {
+            template <class T>
+            constexpr std::complex<T> conj_impl(const std::complex<T>& c)
+            {
+                return std::complex<T>(c.real(), -c.imag());
+            }
+
+            template <class T>
+            constexpr std::complex<T> conj_impl(const T& real)
+            {
+                return std::complex<T>(real, 0);
+            }
+
+#ifdef XTENSOR_USE_XSIMD
+            template <class T, class A>
+            xsimd::complex_batch_type_t<xsimd::batch<T, A>> conj_impl(const xsimd::batch<T, A>& z)
+            {
+                return xsimd::conj(z);
+            }
+#endif
+        }
+
+        UNARY_COMPLEX_FUNCTOR(std, norm);
+        UNARY_COMPLEX_FUNCTOR(std, arg);
+        UNARY_COMPLEX_FUNCTOR(detail, conj_impl);
+    }
+
+#undef UNARY_COMPLEX_FUNCTOR
+
+    /**
+     * Return an xt::xfunction evaluating to the complex conjugate of the given expression.
+     *
+     * @ingroup xt_xcomplex
+     * @param e the xt::xexpression
+     */
+    template <class E>
+    inline auto conj(E&& e) noexcept
+    {
+        using functor = math::conj_impl_fun;
+        using type = xfunction<functor, const_xclosure_t<E>>;
+        return type(functor(), std::forward<E>(e));
+    }
+
+    /**
+     * Calculates the phase angle (in radians) elementwise for the complex numbers in @p e.
+     *
+     * @ingroup xt_xcomplex
+     * @param e the xt::xexpression
+     */
+    template <class E>
+    inline auto arg(E&& e) noexcept
+    {
+        using functor = math::arg_fun;
+        using type = xfunction<functor, const_xclosure_t<E>>;
+        return type(functor(), std::forward<E>(e));
+    }
+
+    /**
+     * Calculates the phase angle elementwise for the complex numbers in @p e.
+     *
+     * Note that this function might be slightly less performant than xt::arg.
+     *
+     * @ingroup xt_xcomplex
+     * @param e the xt::xexpression
+     * @param deg calculate angle in degrees instead of radians
+     */
+    template <class E>
+    inline auto angle(E&& e, bool deg = false) noexcept
+    {
+        using value_type = xtl::complex_value_type_t<typename std::decay_t<E>::value_type>;
+        value_type multiplier = 1.0;
+        if (deg)
+        {
+            multiplier = value_type(180) / numeric_constants<value_type>::PI;
+        }
+        return arg(std::forward<E>(e)) * std::move(multiplier);
+    }
+
+    /**
+     * Calculates the squared magnitude elementwise for the complex numbers in @p e.
+     *
+     * Equivalent to ``xt::pow(xt::real(e), 2) + xt::pow(xt::imag(e), 2)``.
+     * @ingroup xt_xcomplex
+     * @param e the xt::xexpression
+     */
+    template <class E>
+    inline auto norm(E&& e) noexcept
+    {
+        using functor = math::norm_fun;
+        using type = xfunction<functor, const_xclosure_t<E>>;
+        return type(functor(), std::forward<E>(e));
+    }
+}
+#endif

+ 1192 - 0
3rd/numpy/include/xtensor/xcontainer.hpp

@@ -0,0 +1,1192 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_CONTAINER_HPP
+#define XTENSOR_CONTAINER_HPP
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <stdexcept>
+
+#include <xtl/xmeta_utils.hpp>
+#include <xtl/xsequence.hpp>
+
+#include "xaccessible.hpp"
+#include "xiterable.hpp"
+#include "xiterator.hpp"
+#include "xmath.hpp"
+#include "xoperation.hpp"
+#include "xstrides.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_forward.hpp"
+
+namespace xt
+{
+    template <class D>
+    struct xcontainer_iterable_types
+    {
+        using inner_shape_type = typename xcontainer_inner_types<D>::inner_shape_type;
+        using stepper = xstepper<D>;
+        using const_stepper = xstepper<const D>;
+    };
+
+    namespace detail
+    {
+        template <class T>
+        struct allocator_type_impl
+        {
+            using type = typename T::allocator_type;
+        };
+
+        template <class T, std::size_t N>
+        struct allocator_type_impl<std::array<T, N>>
+        {
+            using type = std::allocator<T>;  // fake allocator for testing
+        };
+    }
+
+    template <class T>
+    using allocator_type_t = typename detail::allocator_type_impl<T>::type;
+
+    /**
+     * @class xcontainer
+     * @brief Base class for dense multidimensional containers.
+     *
+     * The xcontainer class defines the interface for dense multidimensional
+     * container classes. It does not embed any data container, this responsibility
+     * is delegated to the inheriting classes.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xcontainer
+     *           provides the interface.
+     */
+    template <class D>
+    class xcontainer : public xcontiguous_iterable<D>,
+                       private xaccessible<D>
+    {
+    public:
+
+        using derived_type = D;
+
+        using inner_types = xcontainer_inner_types<D>;
+        using storage_type = typename inner_types::storage_type;
+        using allocator_type = allocator_type_t<std::decay_t<storage_type>>;
+        using value_type = typename storage_type::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = typename storage_type::pointer;
+        using const_pointer = typename storage_type::const_pointer;
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename storage_type::difference_type;
+        using simd_value_type = xt_simd::simd_type<value_type>;
+        using bool_load_type = xt::bool_load_type<value_type>;
+
+        using shape_type = typename inner_types::shape_type;
+        using strides_type = typename inner_types::strides_type;
+        using backstrides_type = typename inner_types::backstrides_type;
+
+        using inner_shape_type = typename inner_types::inner_shape_type;
+        using inner_strides_type = typename inner_types::inner_strides_type;
+        using inner_backstrides_type = typename inner_types::inner_backstrides_type;
+
+        using iterable_base = xcontiguous_iterable<D>;
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        using accessible_base = xaccessible<D>;
+
+        static constexpr layout_type static_layout = inner_types::layout;
+        static constexpr bool contiguous_layout = static_layout != layout_type::dynamic;
+        using data_alignment = xt_simd::container_alignment_t<storage_type>;
+        using simd_type = xt_simd::simd_type<value_type>;
+
+        using linear_iterator = typename iterable_base::linear_iterator;
+        using const_linear_iterator = typename iterable_base::const_linear_iterator;
+        using reverse_linear_iterator = typename iterable_base::reverse_linear_iterator;
+        using const_reverse_linear_iterator = typename iterable_base::const_reverse_linear_iterator;
+
+        static_assert(static_layout != layout_type::any, "Container layout can never be layout_type::any!");
+
+        size_type size() const noexcept;
+
+        XTENSOR_CONSTEXPR_RETURN size_type dimension() const noexcept;
+
+        XTENSOR_CONSTEXPR_RETURN const inner_shape_type& shape() const noexcept;
+        XTENSOR_CONSTEXPR_RETURN const inner_strides_type& strides() const noexcept;
+        XTENSOR_CONSTEXPR_RETURN const inner_backstrides_type& backstrides() const noexcept;
+
+        template <class T>
+        void fill(const T& value);
+
+        template <class... Args>
+        reference operator()(Args... args);
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        reference unchecked(Args... args);
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        using accessible_base::at;
+        using accessible_base::shape;
+        using accessible_base::operator[];
+        using accessible_base::back;
+        using accessible_base::front;
+        using accessible_base::in_bounds;
+        using accessible_base::periodic;
+
+        template <class It>
+        reference element(It first, It last);
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        storage_type& storage() noexcept;
+        const storage_type& storage() const noexcept;
+
+        pointer data() noexcept;
+        const_pointer data() const noexcept;
+        const size_type data_offset() const noexcept;
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+        template <class S>
+        stepper stepper_begin(const S& shape) noexcept;
+        template <class S>
+        stepper stepper_end(const S& shape, layout_type l) noexcept;
+
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type l) const noexcept;
+
+        reference data_element(size_type i);
+        const_reference data_element(size_type i) const;
+
+        reference flat(size_type i);
+        const_reference flat(size_type i) const;
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        template <class align, class simd>
+        void store_simd(size_type i, const simd& e);
+        template <class align, class requested_type = value_type, std::size_t N = xt_simd::simd_traits<requested_type>::size>
+        container_simd_return_type_t<storage_type, value_type, requested_type>
+        /*simd_return_type<requested_type>*/ load_simd(size_type i) const;
+
+        linear_iterator linear_begin() noexcept;
+        linear_iterator linear_end() noexcept;
+
+        const_linear_iterator linear_begin() const noexcept;
+        const_linear_iterator linear_end() const noexcept;
+        const_linear_iterator linear_cbegin() const noexcept;
+        const_linear_iterator linear_cend() const noexcept;
+
+        reverse_linear_iterator linear_rbegin() noexcept;
+        reverse_linear_iterator linear_rend() noexcept;
+
+        const_reverse_linear_iterator linear_rbegin() const noexcept;
+        const_reverse_linear_iterator linear_rend() const noexcept;
+        const_reverse_linear_iterator linear_crbegin() const noexcept;
+        const_reverse_linear_iterator linear_crend() const noexcept;
+
+        using container_iterator = linear_iterator;
+        using const_container_iterator = const_linear_iterator;
+
+    protected:
+
+        xcontainer() = default;
+        ~xcontainer() = default;
+
+        xcontainer(const xcontainer&) = default;
+        xcontainer& operator=(const xcontainer&) = default;
+
+        xcontainer(xcontainer&&) = default;
+        xcontainer& operator=(xcontainer&&) = default;
+
+        container_iterator data_xbegin() noexcept;
+        const_container_iterator data_xbegin() const noexcept;
+        container_iterator data_xend(layout_type l, size_type offset) noexcept;
+        const_container_iterator data_xend(layout_type l, size_type offset) const noexcept;
+
+    protected:
+
+        derived_type& derived_cast() & noexcept;
+        const derived_type& derived_cast() const& noexcept;
+        derived_type derived_cast() && noexcept;
+
+    private:
+
+        template <class It>
+        It data_xend_impl(It begin, layout_type l, size_type offset) const noexcept;
+
+        inner_shape_type& mutable_shape();
+        inner_strides_type& mutable_strides();
+        inner_backstrides_type& mutable_backstrides();
+
+        template <class C>
+        friend class xstepper;
+
+        friend class xaccessible<D>;
+        friend class xconst_accessible<D>;
+    };
+
+    /**
+     * @class xstrided_container
+     * @brief Partial implementation of xcontainer that embeds the strides and the shape
+     *
+     * The xstrided_container class is a partial implementation of the xcontainer interface
+     * that embed the strides and the shape of the multidimensional container. It does
+     * not embed the data container, this responsibility is delegated to the inheriting
+     * classes.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xstrided_container
+     *           provides the partial imlpementation of xcontainer.
+     */
+    template <class D>
+    class xstrided_container : public xcontainer<D>
+    {
+    public:
+
+        using base_type = xcontainer<D>;
+        using storage_type = typename base_type::storage_type;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+        using size_type = typename base_type::size_type;
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename base_type::strides_type;
+        using inner_shape_type = typename base_type::inner_shape_type;
+        using inner_strides_type = typename base_type::inner_strides_type;
+        using inner_backstrides_type = typename base_type::inner_backstrides_type;
+
+        template <class S = shape_type>
+        void resize(S&& shape, bool force = false);
+        template <class S = shape_type>
+        void resize(S&& shape, layout_type l);
+        template <class S = shape_type>
+        void resize(S&& shape, const strides_type& strides);
+
+        template <class S = shape_type>
+        auto& reshape(S&& shape, layout_type layout = base_type::static_layout) &;
+
+        template <class T>
+        auto& reshape(std::initializer_list<T> shape, layout_type layout = base_type::static_layout) &;
+
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+    protected:
+
+        xstrided_container() noexcept;
+        ~xstrided_container() = default;
+
+        xstrided_container(const xstrided_container&) = default;
+        xstrided_container& operator=(const xstrided_container&) = default;
+
+        xstrided_container(xstrided_container&&) = default;
+        xstrided_container& operator=(xstrided_container&&) = default;
+
+        explicit xstrided_container(inner_shape_type&&, inner_strides_type&&) noexcept;
+        explicit xstrided_container(inner_shape_type&&, inner_strides_type&&, inner_backstrides_type&&, layout_type&&) noexcept;
+
+        inner_shape_type& shape_impl() noexcept;
+        const inner_shape_type& shape_impl() const noexcept;
+
+        inner_strides_type& strides_impl() noexcept;
+        const inner_strides_type& strides_impl() const noexcept;
+
+        inner_backstrides_type& backstrides_impl() noexcept;
+        const inner_backstrides_type& backstrides_impl() const noexcept;
+
+        template <class S = shape_type>
+        void reshape_impl(S&& shape, std::true_type, layout_type layout = base_type::static_layout);
+        template <class S = shape_type>
+        void reshape_impl(S&& shape, std::false_type, layout_type layout = base_type::static_layout);
+
+        layout_type& mutable_layout() noexcept;
+
+    private:
+
+        inner_shape_type m_shape;
+        inner_strides_type m_strides;
+        inner_backstrides_type m_backstrides;
+        layout_type m_layout = base_type::static_layout;
+    };
+
+    /******************************
+     * xcontainer implementation *
+     ******************************/
+
+    template <class D>
+    template <class It>
+    inline It xcontainer<D>::data_xend_impl(It begin, layout_type l, size_type offset) const noexcept
+    {
+        return strided_data_end(*this, begin, l, offset);
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::mutable_shape() -> inner_shape_type&
+    {
+        return derived_cast().shape_impl();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::mutable_strides() -> inner_strides_type&
+    {
+        return derived_cast().strides_impl();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::mutable_backstrides() -> inner_backstrides_type&
+    {
+        return derived_cast().backstrides_impl();
+    }
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the number of element in the container.
+     */
+    template <class D>
+    inline auto xcontainer<D>::size() const noexcept -> size_type
+    {
+        return contiguous_layout ? storage().size() : compute_size(shape());
+    }
+
+    /**
+     * Returns the number of dimensions of the container.
+     */
+    template <class D>
+    XTENSOR_CONSTEXPR_RETURN auto xcontainer<D>::dimension() const noexcept -> size_type
+    {
+        return shape().size();
+    }
+
+    /**
+     * Returns the shape of the container.
+     */
+    template <class D>
+    XTENSOR_CONSTEXPR_RETURN auto xcontainer<D>::shape() const noexcept -> const inner_shape_type&
+    {
+        return derived_cast().shape_impl();
+    }
+
+    /**
+     * Returns the strides of the container.
+     */
+    template <class D>
+    XTENSOR_CONSTEXPR_RETURN auto xcontainer<D>::strides() const noexcept -> const inner_strides_type&
+    {
+        return derived_cast().strides_impl();
+    }
+
+    /**
+     * Returns the backstrides of the container.
+     */
+    template <class D>
+    XTENSOR_CONSTEXPR_RETURN auto xcontainer<D>::backstrides() const noexcept -> const inner_backstrides_type&
+    {
+        return derived_cast().backstrides_impl();
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+    //@{
+
+    /**
+     * Fills the container with the given value.
+     * @param value the value to fill the container with.
+     */
+    template <class D>
+    template <class T>
+    inline void xcontainer<D>::fill(const T& value)
+    {
+        if (contiguous_layout)
+        {
+            std::fill(this->linear_begin(), this->linear_end(), value);
+        }
+        else
+        {
+            std::fill(this->begin(), this->end(), value);
+        }
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the container.
+     * @param args a list of indices specifying the position in the container. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the container.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xcontainer<D>::operator()(Args... args) -> reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        size_type index = xt::data_offset<size_type>(strides(), args...);
+        return storage()[index];
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the container.
+     * @param args a list of indices specifying the position in the container. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the container.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xcontainer<D>::operator()(Args... args) const -> const_reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        size_type index = xt::data_offset<size_type>(strides(), args...);
+        return storage()[index];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the container.
+     * @param args a list of indices specifying the position in the container. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the container, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xcontainer<D>::unchecked(Args... args) -> reference
+    {
+        size_type index = xt::unchecked_data_offset<size_type, static_layout>(
+            strides(),
+            static_cast<std::ptrdiff_t>(args)...
+        );
+        return storage()[index];
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the container.
+     * @param args a list of indices specifying the position in the container. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the container, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xcontainer<D>::unchecked(Args... args) const -> const_reference
+    {
+        size_type index = xt::unchecked_data_offset<size_type, static_layout>(
+            strides(),
+            static_cast<std::ptrdiff_t>(args)...
+        );
+        return storage()[index];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the container.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the container.
+     */
+    template <class D>
+    template <class It>
+    inline auto xcontainer<D>::element(It first, It last) -> reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return storage()[element_offset<size_type>(strides(), first, last)];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the container.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the container.
+     */
+    template <class D>
+    template <class It>
+    inline auto xcontainer<D>::element(It first, It last) const -> const_reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return storage()[element_offset<size_type>(strides(), first, last)];
+    }
+
+    /**
+     * Returns a reference to the buffer containing the elements of the container.
+     */
+    template <class D>
+    inline auto xcontainer<D>::storage() noexcept -> storage_type&
+    {
+        return derived_cast().storage_impl();
+    }
+
+    /**
+     * Returns a constant reference to the buffer containing the elements of the
+     * container.
+     */
+    template <class D>
+    inline auto xcontainer<D>::storage() const noexcept -> const storage_type&
+    {
+        return derived_cast().storage_impl();
+    }
+
+    /**
+     * Returns a pointer to the underlying array serving as element storage. The pointer
+     * is such that range [data(); data() + size()] is always a valid range, even if the
+     * container is empty (data() is not is not dereferenceable in that case)
+     */
+    template <class D>
+    inline auto xcontainer<D>::data() noexcept -> pointer
+    {
+        return storage().data();
+    }
+
+    /**
+     * Returns a constant pointer to the underlying array serving as element storage. The pointer
+     * is such that range [data(); data() + size()] is always a valid range, even if the
+     * container is empty (data() is not is not dereferenceable in that case)
+     */
+    template <class D>
+    inline auto xcontainer<D>::data() const noexcept -> const_pointer
+    {
+        return storage().data();
+    }
+
+    /**
+     * Returns the offset to the first element in the container.
+     */
+    template <class D>
+    inline auto xcontainer<D>::data_offset() const noexcept -> const size_type
+    {
+        return size_type(0);
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the container to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class D>
+    template <class S>
+    inline bool xcontainer<D>::broadcast_shape(S& shape, bool) const
+    {
+        return xt::broadcast_shape(this->shape(), shape);
+    }
+
+    /**
+     * Checks whether the xcontainer can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class D>
+    template <class S>
+    inline bool xcontainer<D>::has_linear_assign(const S& str) const noexcept
+    {
+        return str.size() == strides().size() && std::equal(str.cbegin(), str.cend(), strides().begin());
+    }
+
+    //@}
+
+    template <class D>
+    inline auto xcontainer<D>::derived_cast() const& noexcept -> const derived_type&
+    {
+        return *static_cast<const derived_type*>(this);
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::derived_cast() && noexcept -> derived_type
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::data_element(size_type i) -> reference
+    {
+        return storage()[i];
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::data_element(size_type i) const -> const_reference
+    {
+        return storage()[i];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the container
+     * storage (as if it was one dimensional).
+     * @param i index specifying the position in the storage.
+     * Must be smaller than the number of elements in the container.
+     */
+    template <class D>
+    inline auto xcontainer<D>::flat(size_type i) -> reference
+    {
+        XTENSOR_ASSERT(i < size());
+        return storage()[i];
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the container
+     * storage (as if it was one dimensional).
+     * @param i index specifying the position in the storage.
+     * Must be smaller than the number of elements in the container.
+     */
+    template <class D>
+    inline auto xcontainer<D>::flat(size_type i) const -> const_reference
+    {
+        XTENSOR_ASSERT(i < size());
+        return storage()[i];
+    }
+
+    /***************
+     * stepper api *
+     ***************/
+
+    template <class D>
+    template <class S>
+    inline auto xcontainer<D>::stepper_begin(const S& shape) noexcept -> stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(static_cast<derived_type*>(this), data_xbegin(), offset);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xcontainer<D>::stepper_end(const S& shape, layout_type l) noexcept -> stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(static_cast<derived_type*>(this), data_xend(l, offset), offset);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xcontainer<D>::stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(static_cast<const derived_type*>(this), data_xbegin(), offset);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xcontainer<D>::stepper_end(const S& shape, layout_type l) const noexcept -> const_stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(static_cast<const derived_type*>(this), data_xend(l, offset), offset);
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::data_xbegin() noexcept -> container_iterator
+    {
+        return storage().begin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::data_xbegin() const noexcept -> const_container_iterator
+    {
+        return storage().cbegin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::data_xend(layout_type l, size_type offset) noexcept -> container_iterator
+    {
+        return data_xend_impl(storage().begin(), l, offset);
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::data_xend(layout_type l, size_type offset) const noexcept
+        -> const_container_iterator
+    {
+        return data_xend_impl(storage().cbegin(), l, offset);
+    }
+
+    template <class D>
+    template <class alignment, class simd>
+    inline void xcontainer<D>::store_simd(size_type i, const simd& e)
+    {
+        using align_mode = driven_align_mode_t<alignment, data_alignment>;
+        xt_simd::store_as(std::addressof(storage()[i]), e, align_mode());
+    }
+
+    template <class D>
+    template <class alignment, class requested_type, std::size_t N>
+    inline auto xcontainer<D>::load_simd(size_type i) const
+        -> container_simd_return_type_t<storage_type, value_type, requested_type>
+    {
+        using align_mode = driven_align_mode_t<alignment, data_alignment>;
+        return xt_simd::load_as<requested_type>(std::addressof(storage()[i]), align_mode());
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_begin() noexcept -> linear_iterator
+    {
+        return storage().begin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_end() noexcept -> linear_iterator
+    {
+        return storage().end();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_begin() const noexcept -> const_linear_iterator
+    {
+        return storage().begin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_end() const noexcept -> const_linear_iterator
+    {
+        return storage().cend();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_cbegin() const noexcept -> const_linear_iterator
+    {
+        return storage().cbegin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_cend() const noexcept -> const_linear_iterator
+    {
+        return storage().cend();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_rbegin() noexcept -> reverse_linear_iterator
+    {
+        return storage().rbegin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_rend() noexcept -> reverse_linear_iterator
+    {
+        return storage().rend();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_rbegin() const noexcept -> const_reverse_linear_iterator
+    {
+        return storage().rbegin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_rend() const noexcept -> const_reverse_linear_iterator
+    {
+        return storage().rend();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_crbegin() const noexcept -> const_reverse_linear_iterator
+    {
+        return storage().crbegin();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::linear_crend() const noexcept -> const_reverse_linear_iterator
+    {
+        return storage().crend();
+    }
+
+    template <class D>
+    inline auto xcontainer<D>::derived_cast() & noexcept -> derived_type&
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    /*************************************
+     * xstrided_container implementation *
+     *************************************/
+
+    template <class D>
+    inline xstrided_container<D>::xstrided_container() noexcept
+        : base_type()
+    {
+        m_shape = xtl::make_sequence<inner_shape_type>(base_type::dimension(), 0);
+        m_strides = xtl::make_sequence<inner_strides_type>(base_type::dimension(), 0);
+        m_backstrides = xtl::make_sequence<inner_backstrides_type>(base_type::dimension(), 0);
+    }
+
+    template <class D>
+    inline xstrided_container<D>::xstrided_container(inner_shape_type&& shape, inner_strides_type&& strides) noexcept
+        : base_type()
+        , m_shape(std::move(shape))
+        , m_strides(std::move(strides))
+    {
+        m_backstrides = xtl::make_sequence<inner_backstrides_type>(m_shape.size(), 0);
+        adapt_strides(m_shape, m_strides, m_backstrides);
+    }
+
+    template <class D>
+    inline xstrided_container<D>::xstrided_container(
+        inner_shape_type&& shape,
+        inner_strides_type&& strides,
+        inner_backstrides_type&& backstrides,
+        layout_type&& layout
+    ) noexcept
+        : base_type()
+        , m_shape(std::move(shape))
+        , m_strides(std::move(strides))
+        , m_backstrides(std::move(backstrides))
+        , m_layout(std::move(layout))
+    {
+    }
+
+    template <class D>
+    inline auto xstrided_container<D>::shape_impl() noexcept -> inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    template <class D>
+    inline auto xstrided_container<D>::shape_impl() const noexcept -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    template <class D>
+    inline auto xstrided_container<D>::strides_impl() noexcept -> inner_strides_type&
+    {
+        return m_strides;
+    }
+
+    template <class D>
+    inline auto xstrided_container<D>::strides_impl() const noexcept -> const inner_strides_type&
+    {
+        return m_strides;
+    }
+
+    template <class D>
+    inline auto xstrided_container<D>::backstrides_impl() noexcept -> inner_backstrides_type&
+    {
+        return m_backstrides;
+    }
+
+    template <class D>
+    inline auto xstrided_container<D>::backstrides_impl() const noexcept -> const inner_backstrides_type&
+    {
+        return m_backstrides;
+    }
+
+    /**
+     * Return the layout_type of the container
+     * @return layout_type of the container
+     */
+    template <class D>
+    inline layout_type xstrided_container<D>::layout() const noexcept
+    {
+        return m_layout;
+    }
+
+    template <class D>
+    inline bool xstrided_container<D>::is_contiguous() const noexcept
+    {
+        using str_type = typename inner_strides_type::value_type;
+        auto is_zero = [](auto i)
+        {
+            return i == 0;
+        };
+        if (!is_contiguous_container<storage_type>::value)
+        {
+            return false;
+        }
+        // We need to make sure the inner-most non-zero stride is one.
+        // Trailing zero strides are ignored because they indicate bradcasted dimensions.
+        if (m_layout == layout_type::row_major)
+        {
+            auto it = std::find_if_not(m_strides.rbegin(), m_strides.rend(), is_zero);
+            // If the array has strides of zero, it is a constant, and therefore contiguous.
+            return it == m_strides.rend() || *it == str_type(1);
+        }
+        else if (m_layout == layout_type::column_major)
+        {
+            auto it = std::find_if_not(m_strides.begin(), m_strides.end(), is_zero);
+            // If the array has strides of zero, it is a constant, and therefore contiguous.
+            return it == m_strides.end() || *it == str_type(1);
+        }
+        else
+        {
+            return m_strides.empty();
+        }
+    }
+
+    namespace detail
+    {
+        template <class C, class S>
+        inline void resize_data_container(C& c, S size)
+        {
+            xt::resize_container(c, size);
+        }
+
+        template <class C, class S>
+        inline void resize_data_container(const C& c, S size)
+        {
+            (void) c;  // remove unused parameter warning
+            (void) size;
+            XTENSOR_ASSERT_MSG(c.size() == size, "Trying to resize const data container with wrong size.");
+        }
+
+        template <class S, class T>
+        constexpr bool check_resize_dimension(const S&, const T&)
+        {
+            return true;
+        }
+
+        template <class T, size_t N, class S>
+        constexpr bool check_resize_dimension(const std::array<T, N>&, const S& s)
+        {
+            return N == s.size();
+        }
+    }
+
+    /**
+     * Resizes the container.
+     * @warning Contrary to STL containers like std::vector, resize
+     * does NOT preserve the container elements.
+     * @param shape the new shape
+     * @param force force reshaping, even if the shape stays the same (default: false)
+     */
+    template <class D>
+    template <class S>
+    inline void xstrided_container<D>::resize(S&& shape, bool force)
+    {
+        XTENSOR_ASSERT_MSG(
+            detail::check_resize_dimension(m_shape, shape),
+            "cannot change the number of dimensions of xtensor"
+        )
+        std::size_t dim = shape.size();
+        if (m_shape.size() != dim || !std::equal(std::begin(shape), std::end(shape), std::begin(m_shape))
+            || force)
+        {
+            if (D::static_layout == layout_type::dynamic && m_layout == layout_type::dynamic)
+            {
+                m_layout = XTENSOR_DEFAULT_LAYOUT;  // fall back to default layout
+            }
+            m_shape = xtl::forward_sequence<shape_type, S>(shape);
+
+            resize_container(m_strides, dim);
+            resize_container(m_backstrides, dim);
+            size_type data_size = compute_strides<D::static_layout>(m_shape, m_layout, m_strides, m_backstrides);
+            detail::resize_data_container(this->storage(), data_size);
+        }
+    }
+
+    /**
+     * Resizes the container.
+     * @warning Contrary to STL containers like std::vector, resize
+     * does NOT preserve the container elements.
+     * @param shape the new shape
+     * @param l the new layout_type
+     */
+    template <class D>
+    template <class S>
+    inline void xstrided_container<D>::resize(S&& shape, layout_type l)
+    {
+        XTENSOR_ASSERT_MSG(
+            detail::check_resize_dimension(m_shape, shape),
+            "cannot change the number of dimensions of xtensor"
+        )
+        if (base_type::static_layout != layout_type::dynamic && l != base_type::static_layout)
+        {
+            XTENSOR_THROW(
+                std::runtime_error,
+                "Cannot change layout_type if template parameter not layout_type::dynamic."
+            );
+        }
+        m_layout = l;
+        resize(std::forward<S>(shape), true);
+    }
+
+    /**
+     * Resizes the container.
+     * @warning Contrary to STL containers like std::vector, resize
+     * does NOT preserve the container elements.
+     * @param shape the new shape
+     * @param strides the new strides
+     */
+    template <class D>
+    template <class S>
+    inline void xstrided_container<D>::resize(S&& shape, const strides_type& strides)
+    {
+        XTENSOR_ASSERT_MSG(
+            detail::check_resize_dimension(m_shape, shape),
+            "cannot change the number of dimensions of xtensor"
+        )
+        if (base_type::static_layout != layout_type::dynamic)
+        {
+            XTENSOR_THROW(
+                std::runtime_error,
+                "Cannot resize with custom strides when layout() is != layout_type::dynamic."
+            );
+        }
+        m_shape = xtl::forward_sequence<shape_type, S>(shape);
+        m_strides = strides;
+        resize_container(m_backstrides, m_strides.size());
+        adapt_strides(m_shape, m_strides, m_backstrides);
+        m_layout = layout_type::dynamic;
+        detail::resize_data_container(this->storage(), compute_size(m_shape));
+    }
+
+    /**
+     * Reshapes the container and keeps old elements. The `shape` argument can have one of its value
+     * equal to `-1`, in this case the value is inferred from the number of elements in the container
+     * and the remaining values in the `shape`.
+     * @code{.cpp}
+     * xt::xarray<int> a = { 1, 2, 3, 4, 5, 6, 7, 8 };
+     * a.reshape({-1, 4});
+     * //a.shape() is {2, 4}
+     * @endcode
+     * @param shape the new shape (has to have same number of elements as the original container)
+     * @param layout the layout to compute the strides (defaults to static layout of the container,
+     *               or for a container with dynamic layout to XTENSOR_DEFAULT_LAYOUT)
+     */
+    template <class D>
+    template <class S>
+    inline auto& xstrided_container<D>::reshape(S&& shape, layout_type layout) &
+    {
+        reshape_impl(
+            std::forward<S>(shape),
+            xtl::is_signed<std::decay_t<typename std::decay_t<S>::value_type>>(),
+            std::forward<layout_type>(layout)
+        );
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class T>
+    inline auto& xstrided_container<D>::reshape(std::initializer_list<T> shape, layout_type layout) &
+    {
+        using sh_type = rebind_container_t<T, shape_type>;
+        sh_type sh = xtl::make_sequence<sh_type>(shape.size());
+        std::copy(shape.begin(), shape.end(), sh.begin());
+        reshape_impl(std::move(sh), xtl::is_signed<T>(), std::forward<layout_type>(layout));
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class S>
+    inline void
+    xstrided_container<D>::reshape_impl(S&& shape, std::false_type /* is unsigned */, layout_type layout)
+    {
+        if (compute_size(shape) != this->size())
+        {
+            XTENSOR_THROW(
+                std::runtime_error,
+                "Cannot reshape with incorrect number of elements. Do you mean to resize?"
+            );
+        }
+        if (D::static_layout == layout_type::dynamic && layout == layout_type::dynamic)
+        {
+            layout = XTENSOR_DEFAULT_LAYOUT;  // fall back to default layout
+        }
+        if (D::static_layout != layout_type::dynamic && layout != D::static_layout)
+        {
+            XTENSOR_THROW(std::runtime_error, "Cannot reshape with different layout if static layout != dynamic.");
+        }
+        m_layout = layout;
+        m_shape = xtl::forward_sequence<shape_type, S>(shape);
+        resize_container(m_strides, m_shape.size());
+        resize_container(m_backstrides, m_shape.size());
+        compute_strides<D::static_layout>(m_shape, m_layout, m_strides, m_backstrides);
+    }
+
+    template <class D>
+    template <class S>
+    inline void
+    xstrided_container<D>::reshape_impl(S&& _shape, std::true_type /* is signed */, layout_type layout)
+    {
+        using tmp_value_type = typename std::decay_t<S>::value_type;
+        auto new_size = compute_size(_shape);
+        if (this->size() % new_size)
+        {
+            XTENSOR_THROW(std::runtime_error, "Negative axis size cannot be inferred. Shape mismatch.");
+        }
+        std::decay_t<S> shape = _shape;
+        tmp_value_type accumulator = 1;
+        std::size_t neg_idx = 0;
+        std::size_t i = 0;
+        for (auto it = shape.begin(); it != shape.end(); ++it, i++)
+        {
+            auto&& dim = *it;
+            if (dim < 0)
+            {
+                XTENSOR_ASSERT(dim == -1 && !neg_idx);
+                neg_idx = i;
+            }
+            accumulator *= dim;
+        }
+        if (accumulator < 0)
+        {
+            shape[neg_idx] = static_cast<tmp_value_type>(this->size()) / std::abs(accumulator);
+        }
+        else if (this->size() != new_size)
+        {
+            XTENSOR_THROW(
+                std::runtime_error,
+                "Cannot reshape with incorrect number of elements. Do you mean to resize?"
+            );
+        }
+        m_layout = layout;
+        m_shape = xtl::forward_sequence<shape_type, S>(shape);
+        resize_container(m_strides, m_shape.size());
+        resize_container(m_backstrides, m_shape.size());
+        compute_strides<D::static_layout>(m_shape, m_layout, m_strides, m_backstrides);
+    }
+
+    template <class D>
+    inline auto xstrided_container<D>::mutable_layout() noexcept -> layout_type&
+    {
+        return m_layout;
+    }
+}
+
+#endif

+ 808 - 0
3rd/numpy/include/xtensor/xdynamic_view.hpp

@@ -0,0 +1,808 @@
+/***************************************************************************
+ * Copyright (c) 2016, Johan Mabille, Sylvain Corlay and Wolf Vollprecht    *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_DYNAMIC_VIEW_HPP
+#define XTENSOR_DYNAMIC_VIEW_HPP
+
+#include <xtl/xsequence.hpp>
+#include <xtl/xvariant.hpp>
+
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+#include "xlayout.hpp"
+#include "xsemantic.hpp"
+#include "xstrided_view_base.hpp"
+
+namespace xt
+{
+
+    template <class CT, class S, layout_type L, class FST>
+    class xdynamic_view;
+
+    template <class CT, class S, layout_type L, class FST>
+    struct xcontainer_inner_types<xdynamic_view<CT, S, L, FST>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using undecay_expression = CT;
+        using reference = inner_reference_t<undecay_expression>;
+        using const_reference = typename xexpression_type::const_reference;
+        using size_type = typename xexpression_type::size_type;
+        using shape_type = std::decay_t<S>;
+        using undecay_shape = S;
+        using storage_getter = FST;
+        using inner_storage_type = typename storage_getter::type;
+        using temporary_type = xarray<std::decay_t<typename xexpression_type::value_type>, xexpression_type::static_layout>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class CT, class S, layout_type L, class FST>
+    struct xiterable_inner_types<xdynamic_view<CT, S, L, FST>>
+    {
+        using inner_shape_type = S;
+        using inner_strides_type = inner_shape_type;
+        using inner_backstrides_type = inner_shape_type;
+
+#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ == 8
+        static constexpr auto
+            random_instantiation_var_for_gcc8_data_iface = has_data_interface<xdynamic_view<CT, S, L, FST>>::value;
+        static constexpr auto
+            random_instantiation_var_for_gcc8_has_strides = has_strides<xdynamic_view<CT, S, L, FST>>::value;
+#endif
+
+        // TODO: implement efficient stepper specific to the dynamic_view
+        using const_stepper = xindexed_stepper<const xdynamic_view<CT, S, L, FST>, true>;
+        using stepper = xindexed_stepper<xdynamic_view<CT, S, L, FST>, false>;
+    };
+
+    /****************************
+     * xdynamic_view extensions *
+     ****************************/
+
+    namespace extension
+    {
+        template <class Tag, class CT, class S, layout_type L, class FST>
+        struct xdynamic_view_base_impl;
+
+        template <class CT, class S, layout_type L, class FST>
+        struct xdynamic_view_base_impl<xtensor_expression_tag, CT, S, L, FST>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class CT, class S, layout_type L, class FST>
+        struct xdynamic_view_base : xdynamic_view_base_impl<xexpression_tag_t<CT>, CT, S, L, FST>
+        {
+        };
+
+        template <class CT, class S, layout_type L, class FST>
+        using xdynamic_view_base_t = typename xdynamic_view_base<CT, S, L, FST>::type;
+    }
+
+    /*****************
+     * xdynamic_view *
+     *****************/
+
+    namespace detail
+    {
+        template <class T>
+        class xfake_slice;
+    }
+
+    template <class CT, class S, layout_type L = layout_type::dynamic, class FST = detail::flat_storage_getter<CT, XTENSOR_DEFAULT_TRAVERSAL>>
+    class xdynamic_view : public xview_semantic<xdynamic_view<CT, S, L, FST>>,
+                          public xiterable<xdynamic_view<CT, S, L, FST>>,
+                          public extension::xdynamic_view_base_t<CT, S, L, FST>,
+                          private xstrided_view_base<xdynamic_view<CT, S, L, FST>>
+    {
+    public:
+
+        using self_type = xdynamic_view<CT, S, L, FST>;
+        using base_type = xstrided_view_base<self_type>;
+        using semantic_base = xview_semantic<self_type>;
+        using extension_base = extension::xdynamic_view_base_t<CT, S, L, FST>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using xexpression_type = typename base_type::xexpression_type;
+        using base_type::is_const;
+
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+        using size_type = typename base_type::size_type;
+        using difference_type = typename base_type::difference_type;
+
+        using inner_storage_type = typename base_type::inner_storage_type;
+        using storage_type = typename base_type::storage_type;
+
+        using iterable_base = xiterable<self_type>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using inner_strides_type = typename base_type::inner_strides_type;
+        using inner_backstrides_type = typename base_type::inner_backstrides_type;
+
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        using base_type::contiguous_layout;
+        using base_type::static_layout;
+
+        using temporary_type = typename xcontainer_inner_types<self_type>::temporary_type;
+        using base_index_type = xindex_type_t<shape_type>;
+
+        using simd_value_type = typename base_type::simd_value_type;
+        using bool_load_type = typename base_type::bool_load_type;
+
+        using strides_vt = typename strides_type::value_type;
+        using slice_type = xtl::variant<detail::xfake_slice<strides_vt>, xkeep_slice<strides_vt>, xdrop_slice<strides_vt>>;
+        using slice_vector_type = std::vector<slice_type>;
+
+        template <class CTA, class SA>
+        xdynamic_view(
+            CTA&& e,
+            SA&& shape,
+            get_strides_t<S>&& strides,
+            std::size_t offset,
+            layout_type layout,
+            slice_vector_type&& slices,
+            get_strides_t<S>&& adj_strides
+        ) noexcept;
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+        using base_type::dimension;
+        using base_type::is_contiguous;
+        using base_type::layout;
+        using base_type::shape;
+        using base_type::size;
+
+        // Explicitly deleting strides method to avoid compilers complaining
+        // about not being able to call the strides method from xstrided_view_base
+        // private base
+        const inner_strides_type& strides() const noexcept = delete;
+
+        reference operator()();
+        const_reference operator()() const;
+
+        template <class... Args>
+        reference operator()(Args... args);
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        reference unchecked(Args... args);
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        reference flat(size_type index);
+        const_reference flat(size_type index) const;
+
+        using base_type::operator[];
+        using base_type::at;
+        using base_type::back;
+        using base_type::front;
+        using base_type::in_bounds;
+        using base_type::periodic;
+
+        template <class It>
+        reference element(It first, It last);
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        size_type data_offset() const noexcept;
+
+        // Explicitly deleting data methods so has_data_interface results
+        // to false instead of having compilers complaining about not being
+        // able to call the methods from the private base
+        value_type* data() noexcept = delete;
+        const value_type* data() const noexcept = delete;
+
+        using base_type::broadcast_shape;
+        using base_type::expression;
+        using base_type::storage;
+
+        template <class O>
+        bool has_linear_assign(const O& str) const noexcept;
+
+        template <class T>
+        void fill(const T& value);
+
+        template <class ST>
+        stepper stepper_begin(const ST& shape);
+        template <class ST>
+        stepper stepper_end(const ST& shape, layout_type l);
+
+        template <class ST>
+        const_stepper stepper_begin(const ST& shape) const;
+        template <class ST>
+        const_stepper stepper_end(const ST& shape, layout_type l) const;
+
+        using container_iterator = std::
+            conditional_t<is_const, typename storage_type::const_iterator, typename storage_type::iterator>;
+        using const_container_iterator = typename storage_type::const_iterator;
+
+        template <class E>
+        using rebind_t = xdynamic_view<E, S, L, typename FST::template rebind_t<E>>;
+
+        template <class E>
+        rebind_t<E> build_view(E&& e) const;
+
+    private:
+
+        using offset_type = typename base_type::offset_type;
+
+        slice_vector_type m_slices;
+        inner_strides_type m_adj_strides;
+
+        container_iterator data_xbegin() noexcept;
+        const_container_iterator data_xbegin() const noexcept;
+        container_iterator data_xend(layout_type l, size_type offset) noexcept;
+        const_container_iterator data_xend(layout_type l, size_type offset) const noexcept;
+
+        template <class It>
+        It data_xbegin_impl(It begin) const noexcept;
+
+        template <class It>
+        It data_xend_impl(It end, layout_type l, size_type offset) const noexcept;
+
+        void assign_temporary_impl(temporary_type&& tmp);
+
+        template <class T, class... Args>
+        offset_type adjust_offset(offset_type offset, T idx, Args... args) const noexcept;
+        offset_type adjust_offset(offset_type offset) const noexcept;
+
+        template <class T, class... Args>
+        offset_type
+        adjust_offset_impl(offset_type offset, size_type idx_offset, T idx, Args... args) const noexcept;
+        offset_type adjust_offset_impl(offset_type offset, size_type idx_offset) const noexcept;
+
+        template <class It>
+        offset_type adjust_element_offset(offset_type offset, It first, It last) const noexcept;
+
+        template <class C>
+        friend class xstepper;
+        friend class xview_semantic<self_type>;
+        friend class xaccessible<self_type>;
+        friend class xconst_accessible<self_type>;
+    };
+
+    /**************************
+     * xdynamic_view builders *
+     **************************/
+
+    template <class T>
+    using xdynamic_slice = xtl::variant<
+        T,
+
+        xrange_adaptor<placeholders::xtuph, T, T>,
+        xrange_adaptor<T, placeholders::xtuph, T>,
+        xrange_adaptor<T, T, placeholders::xtuph>,
+
+        xrange_adaptor<T, placeholders::xtuph, placeholders::xtuph>,
+        xrange_adaptor<placeholders::xtuph, T, placeholders::xtuph>,
+        xrange_adaptor<placeholders::xtuph, placeholders::xtuph, T>,
+
+        xrange_adaptor<T, T, T>,
+        xrange_adaptor<placeholders::xtuph, placeholders::xtuph, placeholders::xtuph>,
+
+        xrange<T>,
+        xstepped_range<T>,
+
+        xkeep_slice<T>,
+        xdrop_slice<T>,
+
+        xall_tag,
+        xellipsis_tag,
+        xnewaxis_tag>;
+
+    using xdynamic_slice_vector = std::vector<xdynamic_slice<std::ptrdiff_t>>;
+
+    template <class E>
+    auto dynamic_view(E&& e, const xdynamic_slice_vector& slices);
+
+    /******************************
+     * xfake_slice implementation *
+     ******************************/
+
+    namespace detail
+    {
+        template <class T>
+        class xfake_slice : public xslice<xfake_slice<T>>
+        {
+        public:
+
+            using size_type = T;
+            using self_type = xfake_slice<T>;
+
+            xfake_slice() = default;
+
+            size_type operator()(size_type /*i*/) const noexcept
+            {
+                return size_type(0);
+            }
+
+            size_type size() const noexcept
+            {
+                return size_type(1);
+            }
+
+            size_type step_size() const noexcept
+            {
+                return size_type(0);
+            }
+
+            size_type step_size(std::size_t /*i*/, std::size_t /*n*/ = 1) const noexcept
+            {
+                return size_type(0);
+            }
+
+            size_type revert_index(std::size_t i) const noexcept
+            {
+                return i;
+            }
+
+            bool contains(size_type /*i*/) const noexcept
+            {
+                return true;
+            }
+
+            bool operator==(const self_type& /*rhs*/) const noexcept
+            {
+                return true;
+            }
+
+            bool operator!=(const self_type& /*rhs*/) const noexcept
+            {
+                return false;
+            }
+        };
+    }
+
+    /********************************
+     * xdynamic_view implementation *
+     ********************************/
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class CTA, class SA>
+    inline xdynamic_view<CT, S, L, FST>::xdynamic_view(
+        CTA&& e,
+        SA&& shape,
+        get_strides_t<S>&& strides,
+        std::size_t offset,
+        layout_type layout,
+        slice_vector_type&& slices,
+        get_strides_t<S>&& adj_strides
+    ) noexcept
+        : base_type(std::forward<CTA>(e), std::forward<SA>(shape), std::move(strides), offset, layout)
+        , m_slices(std::move(slices))
+        , m_adj_strides(std::move(adj_strides))
+    {
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class E>
+    inline auto xdynamic_view<CT, S, L, FST>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class E>
+    inline auto xdynamic_view<CT, S, L, FST>::operator=(const E& e) -> disable_xexpression<E, self_type>&
+    {
+        std::fill(this->begin(), this->end(), e);
+        return *this;
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::operator()() -> reference
+    {
+        return base_type::storage()[data_offset()];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::operator()() const -> const_reference
+    {
+        return base_type::storage()[data_offset()];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class... Args>
+    inline auto xdynamic_view<CT, S, L, FST>::operator()(Args... args) -> reference
+    {
+        XTENSOR_TRY(check_index(base_type::shape(), args...));
+        XTENSOR_CHECK_DIMENSION(base_type::shape(), args...);
+        offset_type offset = base_type::compute_index(args...);
+        offset = adjust_offset(offset, args...);
+        return base_type::storage()[static_cast<size_type>(offset)];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class... Args>
+    inline auto xdynamic_view<CT, S, L, FST>::operator()(Args... args) const -> const_reference
+    {
+        XTENSOR_TRY(check_index(base_type::shape(), args...));
+        XTENSOR_CHECK_DIMENSION(base_type::shape(), args...);
+        offset_type offset = base_type::compute_index(args...);
+        offset = adjust_offset(offset, args...);
+        return base_type::storage()[static_cast<size_type>(offset)];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class O>
+    inline bool xdynamic_view<CT, S, L, FST>::has_linear_assign(const O&) const noexcept
+    {
+        return false;
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class... Args>
+    inline auto xdynamic_view<CT, S, L, FST>::unchecked(Args... args) -> reference
+    {
+        offset_type offset = base_type::compute_unchecked_index(args...);
+        offset = adjust_offset(args...);
+        return base_type::storage()[static_cast<size_type>(offset)];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class... Args>
+    inline auto xdynamic_view<CT, S, L, FST>::unchecked(Args... args) const -> const_reference
+    {
+        offset_type offset = base_type::compute_unchecked_index(args...);
+        offset = adjust_offset(args...);
+        return base_type::storage()[static_cast<size_type>(offset)];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::flat(size_type i) -> reference
+    {
+        return base_type::storage()[data_offset() + i];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::flat(size_type i) const -> const_reference
+    {
+        return base_type::storage()[data_offset() + i];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class It>
+    inline auto xdynamic_view<CT, S, L, FST>::element(It first, It last) -> reference
+    {
+        XTENSOR_TRY(check_element_index(base_type::shape(), first, last));
+        offset_type offset = base_type::compute_element_index(first, last);
+        offset = adjust_element_offset(offset, first, last);
+        return base_type::storage()[static_cast<size_type>(offset)];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class It>
+    inline auto xdynamic_view<CT, S, L, FST>::element(It first, It last) const -> const_reference
+    {
+        XTENSOR_TRY(check_element_index(base_type::shape(), first, last));
+        offset_type offset = base_type::compute_element_index(first, last);
+        offset = adjust_element_offset(offset, first, last);
+        return base_type::storage()[static_cast<size_type>(offset)];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::data_offset() const noexcept -> size_type
+    {
+        size_type offset = base_type::data_offset();
+        size_type sl_offset = xtl::visit(
+            [](const auto& sl)
+            {
+                return sl(size_type(0));
+            },
+            m_slices[0]
+        );
+        return offset + sl_offset * m_adj_strides[0];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class T>
+    inline void xdynamic_view<CT, S, L, FST>::fill(const T& value)
+    {
+        return std::fill(this->linear_begin(), this->linear_end(), value);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST>
+    inline auto xdynamic_view<CT, S, L, FST>::stepper_begin(const ST& shape) -> stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST>
+    inline auto xdynamic_view<CT, S, L, FST>::stepper_end(const ST& shape, layout_type /*l*/) -> stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, offset, true);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST>
+    inline auto xdynamic_view<CT, S, L, FST>::stepper_begin(const ST& shape) const -> const_stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST>
+    inline auto xdynamic_view<CT, S, L, FST>::stepper_end(const ST& shape, layout_type /*l*/) const
+        -> const_stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, offset, true);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class E>
+    inline auto xdynamic_view<CT, S, L, FST>::build_view(E&& e) const -> rebind_t<E>
+    {
+        inner_shape_type sh(this->shape());
+        inner_strides_type str(base_type::strides());
+        slice_vector_type svt(m_slices);
+        inner_strides_type adj_str(m_adj_strides);
+        return rebind_t<E>(
+            std::forward<E>(e),
+            std::move(sh),
+            std::move(str),
+            base_type::data_offset(),
+            this->layout(),
+            std::move(svt),
+            std::move(adj_str)
+        );
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::data_xbegin() noexcept -> container_iterator
+    {
+        return data_xbegin_impl(this->storage().begin());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::data_xbegin() const noexcept -> const_container_iterator
+    {
+        return data_xbegin_impl(this->storage().cbegin());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::data_xend(layout_type l, size_type offset) noexcept
+        -> container_iterator
+    {
+        return data_xend_impl(this->storage().begin(), l, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::data_xend(layout_type l, size_type offset) const noexcept
+        -> const_container_iterator
+    {
+        return data_xend_impl(this->storage().cbegin(), l, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class It>
+    inline It xdynamic_view<CT, S, L, FST>::data_xbegin_impl(It begin) const noexcept
+    {
+        return begin + static_cast<std::ptrdiff_t>(data_offset());
+    }
+
+    // TODO: fix the data_xend implementation and assign_temporary_impl
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class It>
+    inline It
+    xdynamic_view<CT, S, L, FST>::data_xend_impl(It begin, layout_type l, size_type offset) const noexcept
+    {
+        return strided_data_end(*this, begin + std::ptrdiff_t(data_offset()), l, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline void xdynamic_view<CT, S, L, FST>::assign_temporary_impl(temporary_type&& tmp)
+    {
+        std::copy(tmp.cbegin(), tmp.cend(), this->begin());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class T, class... Args>
+    inline auto
+    xdynamic_view<CT, S, L, FST>::adjust_offset(offset_type offset, T idx, Args... args) const noexcept
+        -> offset_type
+    {
+        constexpr size_type nb_args = sizeof...(Args) + 1;
+        size_type dim = base_type::dimension();
+        offset_type res = nb_args > dim ? adjust_offset(offset, args...)
+                                        : adjust_offset_impl(offset, dim - nb_args, idx, args...);
+        return res;
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::adjust_offset(offset_type offset) const noexcept -> offset_type
+    {
+        return offset;
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class T, class... Args>
+    inline auto
+    xdynamic_view<CT, S, L, FST>::adjust_offset_impl(offset_type offset, size_type idx_offset, T idx, Args... args)
+        const noexcept -> offset_type
+    {
+        offset_type sl_offset = xtl::visit(
+            [idx](const auto& sl)
+            {
+                using type = typename std::decay_t<decltype(sl)>::size_type;
+                return sl(type(idx));
+            },
+            m_slices[idx_offset]
+        );
+        offset_type res = offset + sl_offset * m_adj_strides[idx_offset];
+        return adjust_offset_impl(res, idx_offset + 1, args...);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xdynamic_view<CT, S, L, FST>::adjust_offset_impl(offset_type offset, size_type) const noexcept
+        -> offset_type
+    {
+        return offset;
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class It>
+    inline auto
+    xdynamic_view<CT, S, L, FST>::adjust_element_offset(offset_type offset, It first, It last) const noexcept
+        -> offset_type
+    {
+        auto dst = std::distance(first, last);
+        offset_type dim = static_cast<offset_type>(dimension());
+        offset_type loop_offset = dst < dim ? dim - dst : offset_type(0);
+        offset_type idx_offset = dim < dst ? dst - dim : offset_type(0);
+        offset_type res = offset;
+        for (offset_type i = loop_offset; i < dim; ++i, ++first)
+        {
+            offset_type j = static_cast<offset_type>(first[idx_offset]);
+            offset_type sl_offset = xtl::visit(
+                [j](const auto& sl)
+                {
+                    return static_cast<offset_type>(sl(j));
+                },
+                m_slices[static_cast<std::size_t>(i)]
+            );
+            res += sl_offset * m_adj_strides[static_cast<std::size_t>(i)];
+        }
+        return res;
+    }
+
+    /*****************************************
+     * xdynamic_view builders implementation *
+     *****************************************/
+
+    namespace detail
+    {
+        template <class V>
+        struct adj_strides_policy
+        {
+            using slice_vector = V;
+            using strides_type = dynamic_shape<std::ptrdiff_t>;
+
+            slice_vector new_slices;
+            strides_type new_adj_strides;
+
+        protected:
+
+            inline void resize(std::size_t size)
+            {
+                new_slices.resize(size);
+                new_adj_strides.resize(size);
+            }
+
+            inline void set_fake_slice(std::size_t idx)
+            {
+                new_slices[idx] = xfake_slice<std::ptrdiff_t>();
+                new_adj_strides[idx] = std::ptrdiff_t(0);
+            }
+
+            template <class ST, class S>
+            bool fill_args(
+                const xdynamic_slice_vector& slices,
+                std::size_t sl_idx,
+                std::size_t i,
+                std::size_t old_shape,
+                const ST& old_stride,
+                S& shape,
+                get_strides_t<S>& strides
+            )
+            {
+                return fill_args_impl<xkeep_slice<std::ptrdiff_t>>(
+                           slices,
+                           sl_idx,
+                           i,
+                           old_shape,
+                           old_stride,
+                           shape,
+                           strides
+                       )
+                       || fill_args_impl<xdrop_slice<std::ptrdiff_t>>(
+                           slices,
+                           sl_idx,
+                           i,
+                           old_shape,
+                           old_stride,
+                           shape,
+                           strides
+                       );
+            }
+
+            template <class SL, class ST, class S>
+            bool fill_args_impl(
+                const xdynamic_slice_vector& slices,
+                std::size_t sl_idx,
+                std::size_t i,
+                std::size_t old_shape,
+                const ST& old_stride,
+                S& shape,
+                get_strides_t<S>& strides
+            )
+            {
+                auto* sl = xtl::get_if<SL>(&slices[sl_idx]);
+                if (sl != nullptr)
+                {
+                    new_slices[i] = *sl;
+                    auto& ns = xtl::get<SL>(new_slices[i]);
+                    ns.normalize(old_shape);
+                    shape[i] = static_cast<std::size_t>(ns.size());
+                    strides[i] = std::ptrdiff_t(0);
+                    new_adj_strides[i] = static_cast<std::ptrdiff_t>(old_stride);
+                }
+                return sl != nullptr;
+            }
+        };
+    }
+
+    template <class E>
+    inline auto dynamic_view(E&& e, const xdynamic_slice_vector& slices)
+    {
+        using view_type = xdynamic_view<xclosure_t<E>, dynamic_shape<std::size_t>>;
+        using slice_vector = typename view_type::slice_vector_type;
+        using policy = detail::adj_strides_policy<slice_vector>;
+        detail::strided_view_args<policy> args;
+        args.fill_args(
+            e.shape(),
+            detail::get_strides<XTENSOR_DEFAULT_TRAVERSAL>(e),
+            detail::get_offset<XTENSOR_DEFAULT_TRAVERSAL>(e),
+            e.layout(),
+            slices
+        );
+        return view_type(
+            std::forward<E>(e),
+            std::move(args.new_shape),
+            std::move(args.new_strides),
+            args.new_offset,
+            args.new_layout,
+            std::move(args.new_slices),
+            std::move(args.new_adj_strides)
+        );
+    }
+}
+
+#endif

+ 177 - 0
3rd/numpy/include/xtensor/xeval.hpp

@@ -0,0 +1,177 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_EVAL_HPP
+#define XTENSOR_EVAL_HPP
+
+#include "xexpression_traits.hpp"
+#include "xshape.hpp"
+#include "xtensor_forward.hpp"
+
+namespace xt
+{
+
+    /**
+     * @defgroup xt_xeval
+     *
+     * Evaluation functions.
+     * Defined in ``xtensor/xeval.hpp``
+     */
+
+    namespace detail
+    {
+        template <class T>
+        using is_container = std::is_base_of<xcontainer<std::remove_const_t<T>>, T>;
+    }
+
+    /**
+     * Force evaluation of xexpression.
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {1, 2, 3, 4};
+     * auto&& b = xt::eval(a); // b is a reference to a, no copy!
+     * auto&& c = xt::eval(a + b); // c is xarray<double>, not an xexpression
+     * @endcode
+     *
+     * @ingroup xt_xeval
+     * @return xt::xarray or xt::xtensor depending on shape type
+     */
+    template <class T>
+    inline auto eval(T&& t) -> std::enable_if_t<detail::is_container<std::decay_t<T>>::value, T&&>
+    {
+        return std::forward<T>(t);
+    }
+
+    /// @cond DOXYGEN_INCLUDE_SFINAE
+    template <class T>
+    inline auto eval(T&& t)
+        -> std::enable_if_t<!detail::is_container<std::decay_t<T>>::value, temporary_type_t<T>>
+    {
+        return std::forward<T>(t);
+    }
+
+    /// @endcond
+
+    namespace detail
+    {
+        /**********************************
+         * has_same_layout implementation *
+         **********************************/
+
+        template <layout_type L = layout_type::any, class E>
+        constexpr bool has_same_layout()
+        {
+            return (std::decay_t<E>::static_layout == L) || (L == layout_type::any);
+        }
+
+        template <layout_type L = layout_type::any, class E>
+        constexpr bool has_same_layout(E&&)
+        {
+            return has_same_layout<L, E>();
+        }
+
+        template <class E1, class E2>
+        constexpr bool has_same_layout(E1&&, E2&&)
+        {
+            return has_same_layout<std::decay_t<E1>::static_layout, E2>();
+        }
+
+        /*********************************
+         * has_fixed_dims implementation *
+         *********************************/
+
+        template <class E>
+        constexpr bool has_fixed_dims()
+        {
+            return detail::is_array<typename std::decay_t<E>::shape_type>::value;
+        }
+
+        template <class E>
+        constexpr bool has_fixed_dims(E&&)
+        {
+            return has_fixed_dims<E>();
+        }
+
+        /****************************************
+         * as_xarray_container_t implementation *
+         ****************************************/
+
+        template <class E, layout_type L>
+        using as_xarray_container_t = xarray<typename std::decay_t<E>::value_type, layout_remove_any(L)>;
+
+        /*****************************************
+         * as_xtensor_container_t implementation *
+         *****************************************/
+
+        template <class E, layout_type L>
+        using as_xtensor_container_t = xtensor<
+            typename std::decay_t<E>::value_type,
+            std::tuple_size<typename std::decay_t<E>::shape_type>::value,
+            layout_remove_any(L)>;
+    }
+
+    /**
+     * Force evaluation of xexpression not providing a data interface
+     * and convert to the required layout.
+     *
+     * @code{.cpp}
+     * xt::xarray<double, xt::layout_type::row_major> a = {1, 2, 3, 4};
+     *
+     * // take reference to a (no copy!)
+     * auto&& b = xt::as_strided(a);
+     *
+     * // xarray<double> with the required layout
+     * auto&& c = xt::as_strided<xt::layout_type::column_major>(a);
+     *
+     * // xexpression
+     * auto&& a_cast = xt::cast<int>(a);
+     *
+     * // xarray<int>, not an xexpression
+     * auto&& d = xt::as_strided(a_cast);
+     *
+     * // xarray<int> with the required layout
+     * auto&& e = xt::as_strided<xt::layout_type::column_major>(a_cast);
+     * @endcode
+     *
+     * @warning This function should be used in a local context only.
+     *          Returning the value returned by this function could lead to a dangling reference.
+     * @ingroup xt_xeval
+     * @return The expression when it already provides a data interface with the correct layout,
+     *         an evaluated xt::xarray or xt::xtensor depending on shape type otherwise.
+     */
+    template <layout_type L = layout_type::any, class E>
+    inline auto as_strided(E&& e)
+        -> std::enable_if_t<has_data_interface<std::decay_t<E>>::value && detail::has_same_layout<L, E>(), E&&>
+    {
+        return std::forward<E>(e);
+    }
+
+    /// @cond DOXYGEN_INCLUDE_SFINAE
+    template <layout_type L = layout_type::any, class E>
+    inline auto as_strided(E&& e) -> std::enable_if_t<
+        (!(has_data_interface<std::decay_t<E>>::value && detail::has_same_layout<L, E>()))
+            && detail::has_fixed_dims<E>(),
+        detail::as_xtensor_container_t<E, L>>
+    {
+        return e;
+    }
+
+    template <layout_type L = layout_type::any, class E>
+    inline auto as_strided(E&& e) -> std::enable_if_t<
+        (!(has_data_interface<std::decay_t<E>>::value && detail::has_same_layout<L, E>()))
+            && (!detail::has_fixed_dims<E>()),
+        detail::as_xarray_container_t<E, L>>
+    {
+        return e;
+    }
+
+    /// @endcond
+}
+
+#endif

+ 373 - 0
3rd/numpy/include/xtensor/xexception.hpp

@@ -0,0 +1,373 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_EXCEPTION_HPP
+#define XTENSOR_EXCEPTION_HPP
+
+#include <iostream>
+#include <iterator>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+
+#include <xtl/xcompare.hpp>
+#include <xtl/xsequence.hpp>
+#include <xtl/xspan_impl.hpp>
+
+#include "xtensor_config.hpp"
+
+#ifdef __GNUC__
+#define XTENSOR_UNUSED_VARIABLE __attribute__((unused))
+#else
+#define XTENSOR_UNUSED_VARIABLE
+#endif
+
+namespace xt
+{
+    struct missing_type
+    {
+    };
+
+    namespace
+    {
+        missing_type XTENSOR_UNUSED_VARIABLE missing;
+    }
+
+    namespace detail
+    {
+        template <class... Args>
+        struct last_type_is_missing_impl
+            : std::is_same<missing_type, xtl::mpl::back_t<xtl::mpl::vector<Args...>>>
+        {
+        };
+
+        template <>
+        struct last_type_is_missing_impl<> : std::false_type
+        {
+        };
+
+        template <class... Args>
+        constexpr bool last_type_is_missing = last_type_is_missing_impl<Args...>::value;
+    }
+
+    /*******************
+     * broadcast_error *
+     *******************/
+
+    class broadcast_error : public std::runtime_error
+    {
+    public:
+
+        explicit broadcast_error(const char* msg)
+            : std::runtime_error(msg)
+        {
+        }
+    };
+
+    template <class S1, class S2>
+    [[noreturn]] void throw_broadcast_error(const S1& lhs, const S2& rhs);
+
+    /*********************
+     * concatenate_error *
+     *********************/
+
+    class concatenate_error : public std::runtime_error
+    {
+    public:
+
+        explicit concatenate_error(const char* msg)
+            : std::runtime_error(msg)
+        {
+        }
+    };
+
+    template <class S1, class S2>
+    [[noreturn]] void throw_concatenate_error(const S1& lhs, const S2& rhs);
+
+    /**********************************
+     * broadcast_error implementation *
+     **********************************/
+
+    namespace detail
+    {
+        template <class S1, class S2>
+        inline std::string shape_error_message(const S1& lhs, const S2& rhs)
+        {
+            std::ostringstream buf("Incompatible dimension of arrays:", std::ios_base::ate);
+
+            buf << "\n LHS shape = (";
+            using size_type1 = typename S1::value_type;
+            std::ostream_iterator<size_type1> iter1(buf, ", ");
+            std::copy(lhs.cbegin(), lhs.cend(), iter1);
+
+            buf << ")\n RHS shape = (";
+            using size_type2 = typename S2::value_type;
+            std::ostream_iterator<size_type2> iter2(buf, ", ");
+            std::copy(rhs.cbegin(), rhs.cend(), iter2);
+            buf << ")";
+
+            return buf.str();
+        }
+    }
+
+#ifdef NDEBUG
+    // Do not inline this function
+    template <class S1, class S2>
+    [[noreturn]] void throw_broadcast_error(const S1&, const S2&)
+    {
+        XTENSOR_THROW(broadcast_error, "Incompatible dimension of arrays, compile in DEBUG for more info");
+    }
+#else
+    template <class S1, class S2>
+    [[noreturn]] void throw_broadcast_error(const S1& lhs, const S2& rhs)
+    {
+        std::string msg = detail::shape_error_message(lhs, rhs);
+        XTENSOR_THROW(broadcast_error, msg.c_str());
+    }
+#endif
+
+    /************************************
+     * concatenate_error implementation *
+     ************************************/
+
+#ifdef NDEBUG
+    // Do not inline this function
+    template <class S1, class S2>
+    [[noreturn]] void throw_concatenate_error(const S1&, const S2&)
+    {
+        XTENSOR_THROW(concatenate_error, "Incompatible dimension of arrays, compile in DEBUG for more info");
+    }
+#else
+    template <class S1, class S2>
+    [[noreturn]] void throw_concatenate_error(const S1& lhs, const S2& rhs)
+    {
+        std::string msg = detail::shape_error_message(lhs, rhs);
+        XTENSOR_THROW(concatenate_error, msg.c_str());
+    }
+#endif
+
+    /*******************
+     * transpose_error *
+     *******************/
+
+    class transpose_error : public std::runtime_error
+    {
+    public:
+
+        explicit transpose_error(const char* msg)
+            : std::runtime_error(msg)
+        {
+        }
+    };
+
+    /***************
+     * check_index *
+     ***************/
+
+    template <class S, class... Args>
+    void check_index(const S& shape, Args... args);
+
+    template <class S, class It>
+    void check_element_index(const S& shape, It first, It last);
+
+    namespace detail
+    {
+        template <class S, std::size_t dim>
+        inline void check_index_impl(const S&)
+        {
+        }
+
+        template <class S, std::size_t dim>
+        inline void check_index_impl(const S&, missing_type)
+        {
+        }
+
+        template <class S, std::size_t dim, class T, class... Args>
+        inline void check_index_impl(const S& shape, T arg, Args... args)
+        {
+            if (std::size_t(arg) >= std::size_t(shape[dim]) && shape[dim] != 1)
+            {
+                XTENSOR_THROW(
+                    std::out_of_range,
+                    "index " + std::to_string(arg) + " is out of bounds for axis " + std::to_string(dim)
+                        + " with size " + std::to_string(shape[dim])
+                );
+            }
+            check_index_impl<S, dim + 1>(shape, args...);
+        }
+    }
+
+    template <class S>
+    inline void check_index(const S&)
+    {
+    }
+
+    template <class S>
+    inline void check_index(const S&, missing_type)
+    {
+    }
+
+    template <class S, class Arg, class... Args>
+    inline void check_index(const S& shape, Arg arg, Args... args)
+    {
+        constexpr std::size_t nargs = sizeof...(Args) + 1;
+        if (nargs == shape.size())
+        {
+            detail::check_index_impl<S, 0>(shape, arg, args...);
+        }
+        else if (nargs > shape.size())
+        {
+            // Too many arguments: drop the first
+            check_index(shape, args...);
+        }
+        else if (detail::last_type_is_missing<Args...>)
+        {
+            // Too few arguments & last argument xt::missing: postfix index with zeros
+            detail::check_index_impl<S, 0>(shape, arg, args...);
+        }
+        else
+        {
+            // Too few arguments: ignore the beginning of the shape
+            auto it = shape.end() - nargs;
+            detail::check_index_impl<decltype(it), 0>(it, arg, args...);
+        }
+    }
+
+    template <class S, class It>
+    inline void check_element_index(const S& shape, It first, It last)
+    {
+        using value_type = typename std::iterator_traits<It>::value_type;
+        using size_type = typename S::size_type;
+        auto dst = static_cast<size_type>(last - first);
+        It efirst = last - static_cast<std::ptrdiff_t>((std::min)(shape.size(), dst));
+        std::size_t axis = 0;
+
+        while (efirst != last)
+        {
+            if (*efirst >= value_type(shape[axis]) && shape[axis] != 1)
+            {
+                XTENSOR_THROW(
+                    std::out_of_range,
+                    "index " + std::to_string(*efirst) + " is out of bounds for axis " + std::to_string(axis)
+                        + " with size " + std::to_string(shape[axis])
+                );
+            }
+            ++efirst, ++axis;
+        }
+    }
+
+    /*******************
+     * check_dimension *
+     *******************/
+
+    template <class S, class... Args>
+    inline void check_dimension(const S& shape, Args...)
+    {
+        if (sizeof...(Args) > shape.size())
+        {
+            XTENSOR_THROW(
+                std::out_of_range,
+                "Number of arguments (" + std::to_string(sizeof...(Args))
+                    + ") is greater than the number of dimensions (" + std::to_string(shape.size()) + ")"
+            );
+        }
+    }
+
+    /*******************************
+     *  check_axis implementation  *
+     *******************************/
+
+    template <class A, class D>
+    inline void check_axis_in_dim(A axis, D dim, const char* subject = "Axis")
+    {
+        const auto sdim = static_cast<std::make_signed_t<D>>(dim);
+        if (xtl::cmp_greater_equal(axis, dim) || xtl::cmp_less(axis, -sdim))
+        {
+            XTENSOR_THROW(
+                std::out_of_range,
+                std::string(subject) + " (" + std::to_string(axis)
+                    + ") is not within the number of dimensions (" + std::to_string(dim) + ')'
+            );
+        }
+    }
+
+    /****************
+     * check_access *
+     ****************/
+
+    template <class S, class... Args>
+    inline void check_access(const S& shape, Args... args)
+    {
+        check_dimension(shape, args...);
+        check_index(shape, args...);
+    }
+
+#if (defined(XTENSOR_ENABLE_ASSERT) && !defined(XTENSOR_DISABLE_EXCEPTIONS))
+#define XTENSOR_TRY(expr) XTENSOR_TRY_IMPL(expr, __FILE__, __LINE__)
+#define XTENSOR_TRY_IMPL(expr, file, line)                                                                \
+    try                                                                                                   \
+    {                                                                                                     \
+        expr;                                                                                             \
+    }                                                                                                     \
+    catch (std::exception & e)                                                                            \
+    {                                                                                                     \
+        XTENSOR_THROW(                                                                                    \
+            std::runtime_error,                                                                           \
+            std::string(file) + ':' + std::to_string(line) + ": check failed\n\t" + std::string(e.what()) \
+        );                                                                                                \
+    }
+#else
+#define XTENSOR_TRY(expr)
+#endif
+
+#ifdef XTENSOR_ENABLE_ASSERT
+#define XTENSOR_ASSERT(expr) XTENSOR_ASSERT_IMPL(expr, __FILE__, __LINE__)
+#define XTENSOR_ASSERT_IMPL(expr, file, line)                                                      \
+    if (!(expr))                                                                                   \
+    {                                                                                              \
+        XTENSOR_THROW(                                                                             \
+            std::runtime_error,                                                                    \
+            std::string(file) + ':' + std::to_string(line) + ": assertion failed (" #expr ") \n\t" \
+        );                                                                                         \
+    }
+#else
+#define XTENSOR_ASSERT(expr)
+#endif
+
+#ifdef XTENSOR_ENABLE_CHECK_DIMENSION
+#define XTENSOR_CHECK_DIMENSION(S, ARGS) XTENSOR_TRY(check_dimension(S, ARGS))
+#else
+#define XTENSOR_CHECK_DIMENSION(S, ARGS)
+#endif
+
+#ifdef XTENSOR_ENABLE_ASSERT
+#define XTENSOR_ASSERT_MSG(expr, msg)                                                                            \
+    if (!(expr))                                                                                                 \
+    {                                                                                                            \
+        XTENSOR_THROW(                                                                                           \
+            std::runtime_error,                                                                                  \
+            std::string("Assertion error!\n") + msg + "\n  " + __FILE__ + '(' + std::to_string(__LINE__) + ")\n" \
+        );                                                                                                       \
+    }
+#else
+#define XTENSOR_ASSERT_MSG(expr, msg)
+#endif
+
+#define XTENSOR_PRECONDITION(expr, msg)                                              \
+    if (!(expr))                                                                     \
+    {                                                                                \
+        XTENSOR_THROW(                                                               \
+            std::runtime_error,                                                      \
+            std::string("Precondition violation!\n") + msg + "\n  " + __FILE__ + '(' \
+                + std::to_string(__LINE__) + ")\n"                                   \
+        );                                                                           \
+    }
+}
+#endif  // XEXCEPTION_HPP

+ 769 - 0
3rd/numpy/include/xtensor/xexpression.hpp

@@ -0,0 +1,769 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_EXPRESSION_HPP
+#define XTENSOR_EXPRESSION_HPP
+
+#include <cstddef>
+#include <type_traits>
+#include <vector>
+
+#include <xtl/xclosure.hpp>
+#include <xtl/xmeta_utils.hpp>
+#include <xtl/xtype_traits.hpp>
+
+#include "xlayout.hpp"
+#include "xshape.hpp"
+#include "xtensor_forward.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    /***************************
+     * xexpression declaration *
+     ***************************/
+
+    /**
+     * @class xexpression
+     * @brief Base class for xexpressions
+     *
+     * The xexpression class is the base class for all classes representing an expression
+     * that can be evaluated to a multidimensional container with tensor semantic.
+     * Functions that can apply to any xexpression regardless of its specific type should take a
+     * xexpression argument.
+     *
+     * @tparam E The derived type.
+     *
+     */
+    template <class D>
+    class xexpression
+    {
+    public:
+
+        using derived_type = D;
+
+        derived_type& derived_cast() & noexcept;
+        const derived_type& derived_cast() const& noexcept;
+        derived_type derived_cast() && noexcept;
+
+    protected:
+
+        xexpression() = default;
+        ~xexpression() = default;
+
+        xexpression(const xexpression&) = default;
+        xexpression& operator=(const xexpression&) = default;
+
+        xexpression(xexpression&&) = default;
+        xexpression& operator=(xexpression&&) = default;
+    };
+
+    /************************************
+     * xsharable_expression declaration *
+     ************************************/
+
+    template <class E>
+    class xshared_expression;
+
+    template <class E>
+    class xsharable_expression;
+
+    namespace detail
+    {
+        template <class E>
+        xshared_expression<E> make_xshared_impl(xsharable_expression<E>&&);
+    }
+
+    template <class D>
+    class xsharable_expression : public xexpression<D>
+    {
+    protected:
+
+        xsharable_expression();
+        ~xsharable_expression() = default;
+
+        xsharable_expression(const xsharable_expression&) = default;
+        xsharable_expression& operator=(const xsharable_expression&) = default;
+
+        xsharable_expression(xsharable_expression&&) = default;
+        xsharable_expression& operator=(xsharable_expression&&) = default;
+
+    private:
+
+        std::shared_ptr<D> p_shared;
+
+        friend xshared_expression<D> detail::make_xshared_impl<D>(xsharable_expression<D>&&);
+    };
+
+    /******************************
+     * xexpression implementation *
+     ******************************/
+
+    /**
+     * @name Downcast functions
+     */
+    //@{
+    /**
+     * Returns a reference to the actual derived type of the xexpression.
+     */
+    template <class D>
+    inline auto xexpression<D>::derived_cast() & noexcept -> derived_type&
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    /**
+     * Returns a constant reference to the actual derived type of the xexpression.
+     */
+    template <class D>
+    inline auto xexpression<D>::derived_cast() const& noexcept -> const derived_type&
+    {
+        return *static_cast<const derived_type*>(this);
+    }
+
+    /**
+     * Returns a constant reference to the actual derived type of the xexpression.
+     */
+    template <class D>
+    inline auto xexpression<D>::derived_cast() && noexcept -> derived_type
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    //@}
+
+    /***************************************
+     * xsharable_expression implementation *
+     ***************************************/
+
+    template <class D>
+    inline xsharable_expression<D>::xsharable_expression()
+        : p_shared(nullptr)
+    {
+    }
+
+    /**
+     * is_crtp_base_of<B, E>
+     * Resembles std::is_base_of, but adresses the problem of whether _some_ instantiation
+     * of a CRTP templated class B is a base of class E. A CRTP templated class is correctly
+     * templated with the most derived type in the CRTP hierarchy. Using this assumption,
+     * this implementation deals with either CRTP final classes (checks for inheritance
+     * with E as the CRTP parameter of B) or CRTP base classes (which are singly templated
+     * by the most derived class, and that's pulled out to use as a templete parameter for B).
+     */
+
+    namespace detail
+    {
+        template <template <class> class B, class E>
+        struct is_crtp_base_of_impl : std::is_base_of<B<E>, E>
+        {
+        };
+
+        template <template <class> class B, class E, template <class> class F>
+        struct is_crtp_base_of_impl<B, F<E>>
+            : xtl::disjunction<std::is_base_of<B<E>, F<E>>, std::is_base_of<B<F<E>>, F<E>>>
+        {
+        };
+    }
+
+    template <template <class> class B, class E>
+    using is_crtp_base_of = detail::is_crtp_base_of_impl<B, std::decay_t<E>>;
+
+    template <class E>
+    using is_xexpression = is_crtp_base_of<xexpression, E>;
+
+    template <class E, class R = void>
+    using enable_xexpression = typename std::enable_if<is_xexpression<E>::value, R>::type;
+
+    template <class E, class R = void>
+    using disable_xexpression = typename std::enable_if<!is_xexpression<E>::value, R>::type;
+
+    template <class... E>
+    using has_xexpression = xtl::disjunction<is_xexpression<E>...>;
+
+    template <class E>
+    using is_xsharable_expression = is_crtp_base_of<xsharable_expression, E>;
+
+    template <class E, class R = void>
+    using enable_xsharable_expression = typename std::enable_if<is_xsharable_expression<E>::value, R>::type;
+
+    template <class E, class R = void>
+    using disable_xsharable_expression = typename std::enable_if<!is_xsharable_expression<E>::value, R>::type;
+
+    template <class LHS, class RHS>
+    struct can_assign : std::is_assignable<LHS, RHS>
+    {
+    };
+
+    template <class LHS, class RHS, class R = void>
+    using enable_assignable_expression = typename std::enable_if<can_assign<LHS, RHS>::value, R>::type;
+
+    template <class LHS, class RHS, class R = void>
+    using enable_not_assignable_expression = typename std::enable_if<!can_assign<LHS, RHS>::value, R>::type;
+
+    /***********************
+     * evaluation_strategy *
+     ***********************/
+
+    namespace detail
+    {
+        struct option_base
+        {
+        };
+    }
+
+    namespace evaluation_strategy
+    {
+
+        struct immediate_type : xt::detail::option_base
+        {
+        };
+
+        constexpr auto immediate = std::tuple<immediate_type>{};
+
+        struct lazy_type : xt::detail::option_base
+        {
+        };
+
+        constexpr auto lazy = std::tuple<lazy_type>{};
+
+        /*
+        struct cached {};
+        */
+    }
+
+    template <class T>
+    struct is_evaluation_strategy : std::is_base_of<detail::option_base, std::decay_t<T>>
+    {
+    };
+
+    /************
+     * xclosure *
+     ************/
+
+    template <class T>
+    class xscalar;
+
+    template <class E, class EN = void>
+    struct xclosure
+    {
+        using type = xtl::closure_type_t<E>;
+    };
+
+    template <class E>
+    struct xclosure<xshared_expression<E>, std::enable_if_t<true>>
+    {
+        using type = xshared_expression<E>;  // force copy
+    };
+
+    template <class E>
+    struct xclosure<E, disable_xexpression<std::decay_t<E>>>
+    {
+        using type = xscalar<xtl::closure_type_t<E>>;
+    };
+
+    template <class E>
+    using xclosure_t = typename xclosure<E>::type;
+
+    template <class E, class EN = void>
+    struct const_xclosure
+    {
+        using type = xtl::const_closure_type_t<E>;
+    };
+
+    template <class E>
+    struct const_xclosure<E, disable_xexpression<std::decay_t<E>>>
+    {
+        using type = xscalar<xtl::const_closure_type_t<E>>;
+    };
+
+    template <class E>
+    struct const_xclosure<xshared_expression<E>&, std::enable_if_t<true>>
+    {
+        using type = xshared_expression<E>;  // force copy
+    };
+
+    template <class E>
+    using const_xclosure_t = typename const_xclosure<E>::type;
+
+    /*************************
+     * expression tag system *
+     *************************/
+
+    struct xtensor_expression_tag
+    {
+    };
+
+    struct xoptional_expression_tag
+    {
+    };
+
+    namespace extension
+    {
+        template <class E, class = void_t<int>>
+        struct get_expression_tag_impl
+        {
+            using type = xtensor_expression_tag;
+        };
+
+        template <class E>
+        struct get_expression_tag_impl<E, void_t<typename std::decay_t<E>::expression_tag>>
+        {
+            using type = typename std::decay_t<E>::expression_tag;
+        };
+
+        template <class E>
+        struct get_expression_tag : get_expression_tag_impl<E>
+        {
+        };
+
+        template <class E>
+        using get_expression_tag_t = typename get_expression_tag<E>::type;
+
+        template <class... T>
+        struct expression_tag_and;
+
+        template <>
+        struct expression_tag_and<>
+        {
+            using type = xtensor_expression_tag;
+        };
+
+        template <class T>
+        struct expression_tag_and<T>
+        {
+            using type = T;
+        };
+
+        template <class T>
+        struct expression_tag_and<T, T>
+        {
+            using type = T;
+        };
+
+        template <class T>
+        struct expression_tag_and<xtensor_expression_tag, T>
+        {
+            using type = T;
+        };
+
+        template <class T>
+        struct expression_tag_and<T, xtensor_expression_tag> : expression_tag_and<xtensor_expression_tag, T>
+        {
+        };
+
+        template <>
+        struct expression_tag_and<xtensor_expression_tag, xtensor_expression_tag>
+        {
+            using type = xtensor_expression_tag;
+        };
+
+        template <class T1, class... T>
+        struct expression_tag_and<T1, T...> : expression_tag_and<T1, typename expression_tag_and<T...>::type>
+        {
+        };
+
+        template <class... T>
+        using expression_tag_and_t = typename expression_tag_and<T...>::type;
+
+        struct xtensor_empty_base
+        {
+            using expression_tag = xtensor_expression_tag;
+        };
+    }
+
+    template <class... T>
+    struct xexpression_tag
+    {
+        using type = extension::expression_tag_and_t<
+            extension::get_expression_tag_t<std::decay_t<const_xclosure_t<T>>>...>;
+    };
+
+    template <class... T>
+    using xexpression_tag_t = typename xexpression_tag<T...>::type;
+
+    template <class E>
+    struct is_xtensor_expression : std::is_same<xexpression_tag_t<E>, xtensor_expression_tag>
+    {
+    };
+
+    template <class E>
+    struct is_xoptional_expression : std::is_same<xexpression_tag_t<E>, xoptional_expression_tag>
+    {
+    };
+
+    /********************************
+     * xoptional_comparable concept *
+     ********************************/
+
+    template <class... E>
+    struct xoptional_comparable
+        : xtl::conjunction<xtl::disjunction<is_xtensor_expression<E>, is_xoptional_expression<E>>...>
+    {
+    };
+
+#define XTENSOR_FORWARD_CONST_METHOD(name)                                   \
+    auto name() const -> decltype(std::declval<xtl::constify_t<E>>().name()) \
+    {                                                                        \
+        return m_ptr->name();                                                \
+    }
+
+#define XTENSOR_FORWARD_METHOD(name)                  \
+    auto name() -> decltype(std::declval<E>().name()) \
+    {                                                 \
+        return m_ptr->name();                         \
+    }
+
+#define XTENSOR_FORWARD_CONST_ITERATOR_METHOD(name)                                               \
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>                                          \
+    auto name() const noexcept -> decltype(std::declval<xtl::constify_t<E>>().template name<L>()) \
+    {                                                                                             \
+        return m_ptr->template name<L>();                                                         \
+    }                                                                                             \
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>                                 \
+    auto name(const S& shape) const noexcept                                                      \
+        -> decltype(std::declval<xtl::constify_t<E>>().template name<L>(shape))                   \
+    {                                                                                             \
+        return m_ptr->template name<L>();                                                         \
+    }
+
+#define XTENSOR_FORWARD_ITERATOR_METHOD(name)                                                 \
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>                             \
+    auto name(const S& shape) noexcept -> decltype(std::declval<E>().template name<L>(shape)) \
+    {                                                                                         \
+        return m_ptr->template name<L>();                                                     \
+    }                                                                                         \
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>                                      \
+    auto name() noexcept -> decltype(std::declval<E>().template name<L>())                    \
+    {                                                                                         \
+        return m_ptr->template name<L>();                                                     \
+    }
+
+    namespace detail
+    {
+        template <class E>
+        struct expr_strides_type
+        {
+            using type = typename E::strides_type;
+        };
+
+        template <class E>
+        struct expr_inner_strides_type
+        {
+            using type = typename E::inner_strides_type;
+        };
+
+        template <class E>
+        struct expr_backstrides_type
+        {
+            using type = typename E::backstrides_type;
+        };
+
+        template <class E>
+        struct expr_inner_backstrides_type
+        {
+            using type = typename E::inner_backstrides_type;
+        };
+
+        template <class E>
+        struct expr_storage_type
+        {
+            using type = typename E::storage_type;
+        };
+    }
+
+    /**
+     * @class xshared_expression
+     * @brief Shared xexpressions
+     *
+     * Due to C++ lifetime constraints it's sometimes necessary to create shared
+     * expressions (akin to a shared pointer).
+     *
+     * For example, when a temporary expression needs to be used twice in another
+     * expression, shared expressions can come to the rescue:
+     *
+     * @code{.cpp}
+     * template <class E>
+     * auto cos_plus_sin(xexpression<E>&& expr)
+     * {
+     *     // THIS IS WRONG: forwarding rvalue twice not permitted!
+     *     // return xt::sin(std::forward<E>(expr)) + xt::cos(std::forward<E>(expr));
+     *     // THIS IS WRONG TOO: because second `expr` is taken as reference (which will be invalid)
+     *     // return xt::sin(std::forward<E>(expr)) + xt::cos(expr)
+     *     auto shared_expr = xt::make_xshared(std::forward<E>(expr));
+     *     auto result = xt::sin(shared_expr) + xt::cos(shared_expr);
+     *     std::cout << shared_expr.use_count() << std::endl; // Will print 3 because used twice in expression
+     *     return result; // all valid because expr lifetime managed by xshared_expression / shared_ptr.
+     * }
+     * @endcode
+     */
+    template <class E>
+    class xshared_expression : public xexpression<xshared_expression<E>>
+    {
+    public:
+
+        using base_class = xexpression<xshared_expression<E>>;
+
+        using value_type = typename E::value_type;
+        using reference = typename E::reference;
+        using const_reference = typename E::const_reference;
+        using pointer = typename E::pointer;
+        using const_pointer = typename E::const_pointer;
+        using size_type = typename E::size_type;
+        using difference_type = typename E::difference_type;
+
+        using inner_shape_type = typename E::inner_shape_type;
+        using shape_type = typename E::shape_type;
+
+        using strides_type = xtl::mpl::
+            eval_if_t<has_strides<E>, detail::expr_strides_type<E>, get_strides_type<shape_type>>;
+        using backstrides_type = xtl::mpl::
+            eval_if_t<has_strides<E>, detail::expr_backstrides_type<E>, get_strides_type<shape_type>>;
+        using inner_strides_type = xtl::mpl::
+            eval_if_t<has_strides<E>, detail::expr_inner_strides_type<E>, get_strides_type<shape_type>>;
+        using inner_backstrides_type = xtl::mpl::
+            eval_if_t<has_strides<E>, detail::expr_inner_backstrides_type<E>, get_strides_type<shape_type>>;
+        using storage_type = xtl::mpl::eval_if_t<has_storage_type<E>, detail::expr_storage_type<E>, make_invalid_type<>>;
+
+        using stepper = typename E::stepper;
+        using const_stepper = typename E::const_stepper;
+
+        using linear_iterator = typename E::linear_iterator;
+        using const_linear_iterator = typename E::const_linear_iterator;
+
+        using bool_load_type = typename E::bool_load_type;
+
+        static constexpr layout_type static_layout = E::static_layout;
+        static constexpr bool contiguous_layout = static_layout != layout_type::dynamic;
+
+        explicit xshared_expression(const std::shared_ptr<E>& ptr);
+        long use_count() const noexcept;
+
+        template <class... Args>
+        auto operator()(Args... args) -> decltype(std::declval<E>()(args...))
+        {
+            return m_ptr->operator()(args...);
+        }
+
+        XTENSOR_FORWARD_CONST_METHOD(shape)
+        XTENSOR_FORWARD_CONST_METHOD(dimension)
+        XTENSOR_FORWARD_CONST_METHOD(size)
+        XTENSOR_FORWARD_CONST_METHOD(layout)
+        XTENSOR_FORWARD_CONST_METHOD(is_contiguous)
+
+        XTENSOR_FORWARD_ITERATOR_METHOD(begin)
+        XTENSOR_FORWARD_ITERATOR_METHOD(end)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(begin)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(end)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(cbegin)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(cend)
+
+        XTENSOR_FORWARD_ITERATOR_METHOD(rbegin)
+        XTENSOR_FORWARD_ITERATOR_METHOD(rend)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(rbegin)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(rend)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(crbegin)
+        XTENSOR_FORWARD_CONST_ITERATOR_METHOD(crend)
+
+        XTENSOR_FORWARD_METHOD(linear_begin)
+        XTENSOR_FORWARD_METHOD(linear_end)
+        XTENSOR_FORWARD_CONST_METHOD(linear_begin)
+        XTENSOR_FORWARD_CONST_METHOD(linear_end)
+        XTENSOR_FORWARD_CONST_METHOD(linear_cbegin)
+        XTENSOR_FORWARD_CONST_METHOD(linear_cend)
+
+        XTENSOR_FORWARD_METHOD(linear_rbegin)
+        XTENSOR_FORWARD_METHOD(linear_rend)
+        XTENSOR_FORWARD_CONST_METHOD(linear_rbegin)
+        XTENSOR_FORWARD_CONST_METHOD(linear_rend)
+        XTENSOR_FORWARD_CONST_METHOD(linear_crbegin)
+        XTENSOR_FORWARD_CONST_METHOD(linear_crend)
+
+        template <class T = E>
+        std::enable_if_t<has_strides<T>::value, const inner_strides_type&> strides() const
+        {
+            return m_ptr->strides();
+        }
+
+        template <class T = E>
+        std::enable_if_t<has_strides<T>::value, const inner_strides_type&> backstrides() const
+        {
+            return m_ptr->backstrides();
+        }
+
+        template <class T = E>
+        std::enable_if_t<has_data_interface<T>::value, pointer> data() noexcept
+        {
+            return m_ptr->data();
+        }
+
+        template <class T = E>
+        std::enable_if_t<has_data_interface<T>::value, pointer> data() const noexcept
+        {
+            return m_ptr->data();
+        }
+
+        template <class T = E>
+        std::enable_if_t<has_data_interface<T>::value, size_type> data_offset() const noexcept
+        {
+            return m_ptr->data_offset();
+        }
+
+        template <class T = E>
+        std::enable_if_t<has_data_interface<T>::value, typename T::storage_type&> storage() noexcept
+        {
+            return m_ptr->storage();
+        }
+
+        template <class T = E>
+        std::enable_if_t<has_data_interface<T>::value, const typename T::storage_type&> storage() const noexcept
+        {
+            return m_ptr->storage();
+        }
+
+        template <class It>
+        reference element(It first, It last)
+        {
+            return m_ptr->element(first, last);
+        }
+
+        template <class It>
+        const_reference element(It first, It last) const
+        {
+            return m_ptr->element(first, last);
+        }
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const
+        {
+            return m_ptr->broadcast_shape(shape, reuse_cache);
+        }
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept
+        {
+            return m_ptr->has_linear_assign(strides);
+        }
+
+        template <class S>
+        auto stepper_begin(const S& shape) noexcept -> decltype(std::declval<E>().stepper_begin(shape))
+        {
+            return m_ptr->stepper_begin(shape);
+        }
+
+        template <class S>
+        auto stepper_end(const S& shape, layout_type l) noexcept
+            -> decltype(std::declval<E>().stepper_end(shape, l))
+        {
+            return m_ptr->stepper_end(shape, l);
+        }
+
+        template <class S>
+        auto stepper_begin(const S& shape) const noexcept
+            -> decltype(std::declval<const E>().stepper_begin(shape))
+        {
+            return static_cast<const E*>(m_ptr.get())->stepper_begin(shape);
+        }
+
+        template <class S>
+        auto stepper_end(const S& shape, layout_type l) const noexcept
+            -> decltype(std::declval<const E>().stepper_end(shape, l))
+        {
+            return static_cast<const E*>(m_ptr.get())->stepper_end(shape, l);
+        }
+
+    private:
+
+        std::shared_ptr<E> m_ptr;
+    };
+
+    /**
+     * Constructor for xshared expression (note: usually the free function
+     * `make_xshared` is recommended).
+     *
+     * @param ptr shared ptr that contains the expression
+     * @sa make_xshared
+     */
+    template <class E>
+    inline xshared_expression<E>::xshared_expression(const std::shared_ptr<E>& ptr)
+        : m_ptr(ptr)
+    {
+    }
+
+    /**
+     * Return the number of times this expression is referenced.
+     * Internally calls the use_count() function of the std::shared_ptr.
+     */
+    template <class E>
+    inline long xshared_expression<E>::use_count() const noexcept
+    {
+        return m_ptr.use_count();
+    }
+
+    namespace detail
+    {
+        template <class E>
+        inline xshared_expression<E> make_xshared_impl(xsharable_expression<E>&& expr)
+        {
+            if (expr.p_shared == nullptr)
+            {
+                expr.p_shared = std::make_shared<E>(std::move(expr).derived_cast());
+            }
+            return xshared_expression<E>(expr.p_shared);
+        }
+    }
+
+    /**
+     * Helper function to create shared expression from any xexpression
+     *
+     * @param expr rvalue expression that will be shared
+     * @return xshared expression
+     */
+    template <class E>
+    inline xshared_expression<E> make_xshared(xexpression<E>&& expr)
+    {
+        static_assert(
+            is_xsharable_expression<E>::value,
+            "make_shared requires E to inherit from xsharable_expression"
+        );
+        return detail::make_xshared_impl(std::move(expr.derived_cast()));
+    }
+
+    /**
+     * Helper function to create shared expression from any xexpression
+     *
+     * @param expr rvalue expression that will be shared
+     * @return xshared expression
+     * @sa make_xshared
+     */
+    template <class E>
+    inline auto share(xexpression<E>& expr)
+    {
+        return make_xshared(std::move(expr));
+    }
+
+    /**
+     * Helper function to create shared expression from any xexpression
+     *
+     * @param expr rvalue expression that will be shared
+     * @return xshared expression
+     * @sa make_xshared
+     */
+    template <class E>
+    inline auto share(xexpression<E>&& expr)
+    {
+        return make_xshared(std::move(expr));
+    }
+
+#undef XTENSOR_FORWARD_METHOD
+
+}
+
+#endif

+ 273 - 0
3rd/numpy/include/xtensor/xexpression_holder.hpp

@@ -0,0 +1,273 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_XEXPRESSION_HOLDER_HPP
+#define XTENSOR_XEXPRESSION_HOLDER_HPP
+
+#include <memory>
+
+#include <nlohmann/json.hpp>
+
+#include "xarray.hpp"
+#include "xjson.hpp"
+#include "xtensor_config.hpp"
+#include "xtl/xany.hpp"
+
+namespace xt
+{
+
+    namespace detail
+    {
+        class xexpression_holder_impl;
+
+        template <class CTE>
+        class xexpression_wrapper;
+    }
+
+    class xexpression_holder  // Value semantic
+    {
+    public:
+
+        using implementation_type = detail::xexpression_holder_impl;
+
+        xexpression_holder() = default;
+
+        template <class E>
+        xexpression_holder(E&& expr);
+
+        xexpression_holder(implementation_type* holder);
+        xexpression_holder(const xexpression_holder& holder);
+        xexpression_holder(xexpression_holder&& holder);
+
+        xexpression_holder& operator=(const xexpression_holder&);
+        xexpression_holder& operator=(xexpression_holder&&);
+
+        void swap(xexpression_holder&);
+
+        void to_json(nlohmann::json&) const;
+        void from_json(const nlohmann::json&);
+
+    private:
+
+        void init_pointer_from_json(const nlohmann::json&);
+        void check_holder() const;
+
+        std::unique_ptr<implementation_type> p_holder;
+    };
+
+    /*************************************
+     * to_json and from_json declaration *
+     *************************************/
+
+    /// @cond DOXYGEN_INCLUDE_SFINAE
+    void to_json(nlohmann::json& j, const xexpression_holder& o);
+    void from_json(const nlohmann::json& j, xexpression_holder& o);
+
+    /// @endcond
+
+    namespace detail
+    {
+        class xexpression_holder_impl  // Entity semantic
+        {
+        public:
+
+            xexpression_holder_impl(xexpression_holder_impl&&) = delete;
+
+            xexpression_holder_impl& operator=(const xexpression_holder_impl&) = delete;
+            xexpression_holder_impl& operator=(xexpression_holder_impl&&) = delete;
+
+            virtual xexpression_holder_impl* clone() const = 0;
+            virtual void to_json(nlohmann::json&) const = 0;
+            virtual void from_json(const nlohmann::json&) = 0;
+            virtual ~xexpression_holder_impl() = default;
+
+        protected:
+
+            xexpression_holder_impl() = default;
+            xexpression_holder_impl(const xexpression_holder_impl&) = default;
+        };
+
+        template <class CTE>
+        class xexpression_wrapper : public xexpression_holder_impl
+        {
+        public:
+
+            template <class E>
+            xexpression_wrapper(E&& expr);
+
+            xexpression_wrapper* clone() const;
+
+            void to_json(nlohmann::json&) const;
+            void from_json(const nlohmann::json&);
+
+            ~xexpression_wrapper() = default;
+
+        protected:
+
+            xexpression_wrapper(const xexpression_wrapper&);
+
+        private:
+
+            CTE m_expression;
+        };
+    }
+
+    template <class E>
+    inline xexpression_holder::xexpression_holder(E&& expr)
+        : p_holder(new detail::xexpression_wrapper<E>(std::forward<E>(expr)))
+    {
+    }
+
+    inline xexpression_holder::xexpression_holder(implementation_type* holder)
+        : p_holder(holder)
+    {
+    }
+
+    inline xexpression_holder::xexpression_holder(const xexpression_holder& holder)
+        : p_holder(holder.p_holder->clone())
+    {
+    }
+
+    inline xexpression_holder::xexpression_holder(xexpression_holder&& holder)
+        : p_holder(std::move(holder.p_holder))
+    {
+    }
+
+    inline xexpression_holder& xexpression_holder::operator=(const xexpression_holder& holder)
+    {
+        xexpression_holder tmp(holder);
+        swap(tmp);
+        return *this;
+    }
+
+    inline xexpression_holder& xexpression_holder::operator=(xexpression_holder&& holder)
+    {
+        swap(holder);
+        return *this;
+    }
+
+    inline void xexpression_holder::swap(xexpression_holder& holder)
+    {
+        std::swap(p_holder, holder.p_holder);
+    }
+
+    inline void xexpression_holder::to_json(nlohmann::json& j) const
+    {
+        if (p_holder == nullptr)
+        {
+            return;
+        }
+        p_holder->to_json(j);
+    }
+
+    inline void xexpression_holder::from_json(const nlohmann::json& j)
+    {
+        if (!j.is_array())
+        {
+            XTENSOR_THROW(std::runtime_error, "Received a JSON that does not contain a tensor");
+        }
+
+        if (p_holder == nullptr)
+        {
+            init_pointer_from_json(j);
+        }
+        p_holder->from_json(j);
+    }
+
+    inline void xexpression_holder::init_pointer_from_json(const nlohmann::json& j)
+    {
+        if (j.is_array())
+        {
+            return init_pointer_from_json(j[0]);
+        }
+
+        if (j.is_number())
+        {
+            xt::xarray<double> empty_arr;
+            p_holder.reset(new detail::xexpression_wrapper<xt::xarray<double>>(std::move(empty_arr)));
+        }
+
+        if (j.is_boolean())
+        {
+            xt::xarray<bool> empty_arr;
+            p_holder.reset(new detail::xexpression_wrapper<xt::xarray<bool>>(std::move(empty_arr)));
+        }
+
+        if (j.is_string())
+        {
+            xt::xarray<std::string> empty_arr;
+            p_holder.reset(new detail::xexpression_wrapper<xt::xarray<std::string>>(std::move(empty_arr)));
+        }
+
+        XTENSOR_THROW(std::runtime_error, "Received a JSON with a tensor that contains unsupported data type");
+    }
+
+    inline void xexpression_holder::check_holder() const
+    {
+        if (p_holder == nullptr)
+        {
+            XTENSOR_THROW(std::runtime_error, "The holder does not contain an expression");
+        }
+    }
+
+    /****************************************
+     * to_json and from_json implementation *
+     ****************************************/
+
+    /// @cond DOXYGEN_INCLUDE_SFINAE
+    inline void to_json(nlohmann::json& j, const xexpression_holder& o)
+    {
+        o.to_json(j);
+    }
+
+    inline void from_json(const nlohmann::json& j, xexpression_holder& o)
+    {
+        o.from_json(j);
+    }
+
+    /// @endcond
+
+    namespace detail
+    {
+        template <class CTE>
+        template <class E>
+        inline xexpression_wrapper<CTE>::xexpression_wrapper(E&& expr)
+            : xexpression_holder_impl()
+            , m_expression(std::forward<E>(expr))
+        {
+        }
+
+        template <class CTE>
+        inline xexpression_wrapper<CTE>* xexpression_wrapper<CTE>::clone() const
+        {
+            return new xexpression_wrapper<CTE>(*this);
+        }
+
+        template <class CTE>
+        inline void xexpression_wrapper<CTE>::to_json(nlohmann::json& j) const
+        {
+            ::xt::to_json(j, m_expression);
+        }
+
+        template <class CTE>
+        inline void xexpression_wrapper<CTE>::from_json(const nlohmann::json& j)
+        {
+            ::xt::from_json(j, m_expression);
+        }
+
+        template <class CTE>
+        inline xexpression_wrapper<CTE>::xexpression_wrapper(const xexpression_wrapper& wrapper)
+            : xexpression_holder_impl()
+            , m_expression(wrapper.m_expression)
+        {
+        }
+    }
+}
+
+#endif

+ 198 - 0
3rd/numpy/include/xtensor/xexpression_traits.hpp

@@ -0,0 +1,198 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_EXPRESSION_TRAITS_HPP
+#define XTENSOR_EXPRESSION_TRAITS_HPP
+
+#include "xexpression.hpp"
+
+namespace xt
+{
+    /***************
+     * xvalue_type *
+     ***************/
+
+    namespace detail
+    {
+        template <class E, class enable = void>
+        struct xvalue_type_impl
+        {
+            using type = E;
+        };
+
+        template <class E>
+        struct xvalue_type_impl<E, std::enable_if_t<is_xexpression<E>::value>>
+        {
+            using type = typename E::value_type;
+        };
+    }
+
+    template <class E>
+    using xvalue_type = detail::xvalue_type_impl<E>;
+
+    template <class E>
+    using xvalue_type_t = typename xvalue_type<E>::type;
+
+    /*********************
+     * common_value_type *
+     *********************/
+
+    template <class... C>
+    struct common_value_type
+    {
+        using type = std::common_type_t<typename std::decay_t<C>::value_type...>;
+    };
+
+    template <class... C>
+    using common_value_type_t = typename common_value_type<C...>::type;
+
+    /********************
+     * common_size_type *
+     ********************/
+
+    template <class... Args>
+    struct common_size_type
+    {
+        using type = std::common_type_t<typename Args::size_type...>;
+    };
+
+    template <>
+    struct common_size_type<>
+    {
+        using type = std::size_t;
+    };
+
+    template <class... Args>
+    using common_size_type_t = typename common_size_type<Args...>::type;
+
+    /**************************
+     * common_difference type *
+     **************************/
+
+    template <class... Args>
+    struct common_difference_type
+    {
+        using type = std::common_type_t<typename Args::difference_type...>;
+    };
+
+    template <>
+    struct common_difference_type<>
+    {
+        using type = std::ptrdiff_t;
+    };
+
+    template <class... Args>
+    using common_difference_type_t = typename common_difference_type<Args...>::type;
+
+    /******************
+     * temporary_type *
+     ******************/
+
+    namespace detail
+    {
+        template <class S>
+        struct xtype_for_shape
+        {
+            template <class T, layout_type L>
+            using type = xarray<T, L>;
+        };
+
+#if defined(__GNUC__) && (__GNUC__ > 6)
+#if __cplusplus == 201703L
+        template <template <class, std::size_t, class, bool> class S, class X, std::size_t N, class A, bool Init>
+        struct xtype_for_shape<S<X, N, A, Init>>
+        {
+            template <class T, layout_type L>
+            using type = xarray<T, L>;
+        };
+#endif  // __cplusplus == 201703L
+#endif  // __GNUC__ && (__GNUC__ > 6)
+
+        template <template <class, std::size_t> class S, class X, std::size_t N>
+        struct xtype_for_shape<S<X, N>>
+        {
+            template <class T, layout_type L>
+            using type = xtensor<T, N, L>;
+        };
+
+        template <template <std::size_t...> class S, std::size_t... X>
+        struct xtype_for_shape<S<X...>>
+        {
+            template <class T, layout_type L>
+            using type = xtensor_fixed<T, xshape<X...>, L>;
+        };
+    }
+
+    template <class Tag, class T>
+    struct temporary_type_from_tag;
+
+    template <class T>
+    struct temporary_type_from_tag<xtensor_expression_tag, T>
+    {
+        using I = std::decay_t<T>;
+        using shape_type = typename I::shape_type;
+        using value_type = typename I::value_type;
+        static constexpr layout_type static_layout = XTENSOR_DEFAULT_LAYOUT;
+        using type = typename detail::xtype_for_shape<shape_type>::template type<value_type, static_layout>;
+    };
+
+    template <class T, class = void>
+    struct temporary_type
+    {
+        using type = typename temporary_type_from_tag<xexpression_tag_t<T>, T>::type;
+    };
+
+    template <class T>
+    struct temporary_type<T, void_t<typename std::decay_t<T>::temporary_type>>
+    {
+        using type = typename std::decay_t<T>::temporary_type;
+    };
+
+    template <class T>
+    using temporary_type_t = typename temporary_type<T>::type;
+
+    /**********************
+     * common_tensor_type *
+     **********************/
+
+    namespace detail
+    {
+        template <class... C>
+        struct common_tensor_type_impl
+        {
+            static constexpr layout_type static_layout = compute_layout(std::decay_t<C>::static_layout...);
+            using value_type = common_value_type_t<C...>;
+            using shape_type = promote_shape_t<typename C::shape_type...>;
+            using type = typename xtype_for_shape<shape_type>::template type<value_type, static_layout>;
+        };
+    }
+
+    template <class... C>
+    struct common_tensor_type : detail::common_tensor_type_impl<std::decay_t<C>...>
+    {
+    };
+
+    template <class... C>
+    using common_tensor_type_t = typename common_tensor_type<C...>::type;
+
+    /**************************
+     * big_promote_value_type *
+     **************************/
+
+    template <class E>
+    struct big_promote_value_type
+    {
+        using type = xtl::big_promote_type_t<typename std::decay_t<E>::value_type>;
+    };
+
+    template <class E>
+    using big_promote_value_type_t = typename big_promote_value_type<E>::type;
+}
+
+#endif

+ 991 - 0
3rd/numpy/include/xtensor/xfixed.hpp

@@ -0,0 +1,991 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_FIXED_HPP
+#define XTENSOR_FIXED_HPP
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+#include <xtl/xsequence.hpp>
+
+#include "xcontainer.hpp"
+#include "xsemantic.hpp"
+#include "xstorage.hpp"
+#include "xstrides.hpp"
+#include "xtensor_config.hpp"
+
+namespace xtl
+{
+    namespace detail
+    {
+        template <class T, std::size_t N>
+        struct sequence_builder<xt::const_array<T, N>>
+        {
+            using sequence_type = xt::const_array<T, N>;
+            using value_type = typename sequence_type::value_type;
+            using size_type = typename sequence_type::size_type;
+
+            inline static sequence_type make(size_type /*size*/, value_type /*v*/)
+            {
+                return sequence_type();
+            }
+        };
+    }
+}
+
+namespace xt
+{
+
+    /**********************
+     * xfixed declaration *
+     **********************/
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    class xfixed_container;
+
+    namespace detail
+    {
+        /**************************************************************************************
+           The following is something we can currently only dream about -- for when we drop
+           support for a lot of the old compilers (e.g. GCC 4.9, MSVC 2017 ;)
+
+        template <class T>
+        constexpr std::size_t calculate_stride(T& shape, std::size_t idx, layout_type L)
+        {
+            if (shape[idx] == 1)
+            {
+                return std::size_t(0);
+            }
+
+            std::size_t data_size = 1;
+            std::size_t stride = 1;
+            if (L == layout_type::row_major)
+            {
+                // because we have a integer sequence that counts
+                // from 0 to sz - 1, we need to "invert" idx here
+                idx = shape.size() - idx;
+                for (std::size_t i = idx; i != 0; --i)
+                {
+                    stride = data_size;
+                    data_size = stride * shape[i - 1];
+                }
+            }
+            else
+            {
+                for (std::size_t i = 0; i < idx + 1; ++i)
+                {
+                    stride = data_size;
+                    data_size = stride * shape[i];
+                }
+            }
+            return stride;
+        }
+
+        *****************************************************************************************/
+
+        template <layout_type L, std::size_t I, std::size_t... X>
+        struct calculate_stride;
+
+        template <std::size_t I, std::size_t Y, std::size_t... X>
+        struct calculate_stride<layout_type::column_major, I, Y, X...>
+        {
+            static constexpr std::ptrdiff_t value = Y
+                                                    * calculate_stride<layout_type::column_major, I - 1, X...>::value;
+        };
+
+        template <std::size_t Y, std::size_t... X>
+        struct calculate_stride<layout_type::column_major, 0, Y, X...>
+        {
+            static constexpr std::ptrdiff_t value = 1;
+        };
+
+        template <std::size_t I, std::size_t... X>
+        struct calculate_stride_row_major
+        {
+            static constexpr std::ptrdiff_t value = at<sizeof...(X) - I, X...>::value
+                                                    * calculate_stride_row_major<I - 1, X...>::value;
+        };
+
+        template <std::size_t... X>
+        struct calculate_stride_row_major<0, X...>
+        {
+            static constexpr std::ptrdiff_t value = 1;
+        };
+
+        template <std::size_t I, std::size_t... X>
+        struct calculate_stride<layout_type::row_major, I, X...>
+        {
+            static constexpr std::ptrdiff_t value = calculate_stride_row_major<sizeof...(X) - I - 1, X...>::value;
+        };
+
+        namespace workaround
+        {
+            template <layout_type L, size_t I, class SEQ>
+            struct computed_strides;
+
+            template <layout_type L, size_t I, size_t... X>
+            struct computed_strides<L, I, std::index_sequence<X...>>
+            {
+                static constexpr std::ptrdiff_t value = calculate_stride<L, I, X...>::value;
+            };
+
+            template <layout_type L, size_t I, class SEQ>
+            constexpr std::ptrdiff_t get_computed_strides(bool cond)
+            {
+                return cond ? 0 : computed_strides<L, I, SEQ>::value;
+            }
+        }
+
+        template <layout_type L, class R, std::size_t... X, std::size_t... I>
+        constexpr R get_strides_impl(const xt::fixed_shape<X...>& shape, std::index_sequence<I...>)
+        {
+            static_assert(
+                (L == layout_type::row_major) || (L == layout_type::column_major),
+                "Layout not supported for fixed array"
+            );
+#if (_MSC_VER >= 1910)
+            using temp_type = std::index_sequence<X...>;
+            return R({workaround::get_computed_strides<L, I, temp_type>(shape[I] == 1)...});
+#else
+            return R({shape[I] == 1 ? 0 : calculate_stride<L, I, X...>::value...});
+#endif
+        }
+
+        template <class S, class T, std::size_t... I>
+        constexpr T get_backstrides_impl(const S& shape, const T& strides, std::index_sequence<I...>)
+        {
+            return T({(strides[I] * std::ptrdiff_t(shape[I] - 1))...});
+        }
+
+        template <std::size_t... X>
+        struct fixed_compute_size_impl;
+
+        template <std::size_t Y, std::size_t... X>
+        struct fixed_compute_size_impl<Y, X...>
+        {
+            static constexpr std::size_t value = Y * fixed_compute_size_impl<X...>::value;
+        };
+
+        template <std::size_t X>
+        struct fixed_compute_size_impl<X>
+        {
+            static constexpr std::size_t value = X;
+        };
+
+        template <>
+        struct fixed_compute_size_impl<>
+        {
+            // support for 0D xtensor fixed (empty shape = xshape<>)
+            static constexpr std::size_t value = 1;
+        };
+
+        // TODO unify with constexpr compute_size when dropping MSVC 2015
+        template <class T>
+        struct fixed_compute_size;
+
+        template <std::size_t... X>
+        struct fixed_compute_size<xt::fixed_shape<X...>>
+        {
+            static constexpr std::size_t value = fixed_compute_size_impl<X...>::value;
+        };
+
+        template <class V, std::size_t... X>
+        struct get_init_type_impl;
+
+        template <class V, std::size_t Y>
+        struct get_init_type_impl<V, Y>
+        {
+            using type = V[Y];
+        };
+
+        template <class V>
+        struct get_init_type_impl<V>
+        {
+            using type = V[1];
+        };
+
+        template <class V, std::size_t Y, std::size_t... X>
+        struct get_init_type_impl<V, Y, X...>
+        {
+            using tmp_type = typename get_init_type_impl<V, X...>::type;
+            using type = tmp_type[Y];
+        };
+    }
+
+    template <layout_type L, class R, std::size_t... X>
+    constexpr R get_strides(const fixed_shape<X...>& shape) noexcept
+    {
+        return detail::get_strides_impl<L, R>(shape, std::make_index_sequence<sizeof...(X)>{});
+    }
+
+    template <class S, class T>
+    constexpr T get_backstrides(const S& shape, const T& strides) noexcept
+    {
+        return detail::get_backstrides_impl(shape, strides, std::make_index_sequence<std::tuple_size<T>::value>{});
+    }
+
+    template <class V, class S>
+    struct get_init_type;
+
+    template <class V, std::size_t... X>
+    struct get_init_type<V, fixed_shape<X...>>
+    {
+        using type = typename detail::get_init_type_impl<V, X...>::type;
+    };
+
+    template <class V, class S>
+    using get_init_type_t = typename get_init_type<V, S>::type;
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    struct xcontainer_inner_types<xfixed_container<ET, S, L, SH, Tag>>
+    {
+        using shape_type = S;
+        using inner_shape_type = typename S::cast_type;
+        using strides_type = get_strides_t<inner_shape_type>;
+        using inner_strides_type = strides_type;
+        using backstrides_type = inner_strides_type;
+        using inner_backstrides_type = backstrides_type;
+
+        // NOTE: 0D (S::size() == 0) results in storage for 1 element (scalar)
+#if defined(_MSC_VER) && _MSC_VER < 1910 && !defined(_WIN64)
+        // WORKAROUND FOR MSVC 2015 32 bit, fallback to unaligned container for 0D scalar case
+        using storage_type = std::array<ET, detail::fixed_compute_size<S>::value>;
+#else
+        using storage_type = aligned_array<ET, detail::fixed_compute_size<S>::value>;
+#endif
+
+        using reference = typename storage_type::reference;
+        using const_reference = typename storage_type::const_reference;
+        using size_type = typename storage_type::size_type;
+        using temporary_type = xfixed_container<ET, S, L, SH, Tag>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    struct xiterable_inner_types<xfixed_container<ET, S, L, SH, Tag>>
+        : xcontainer_iterable_types<xfixed_container<ET, S, L, SH, Tag>>
+    {
+    };
+
+    /**
+     * @class xfixed_container
+     * @brief Dense multidimensional container with tensor semantic and fixed
+     * dimension.
+     *
+     * The xfixed_container class implements a dense multidimensional container
+     * with tensor semantic and fixed dimension
+     *
+     * @tparam ET The type of the elements.
+     * @tparam S The xshape template paramter of the container.
+     * @tparam L The layout_type of the tensor.
+     * @tparam SH Wether the tensor can be used as a shared expression.
+     * @tparam Tag The expression tag.
+     * @sa xtensor_fixed
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    class xfixed_container : public xcontainer<xfixed_container<ET, S, L, SH, Tag>>,
+                             public xcontainer_semantic<xfixed_container<ET, S, L, SH, Tag>>
+    {
+    public:
+
+        using self_type = xfixed_container<ET, S, L, SH, Tag>;
+        using base_type = xcontainer<self_type>;
+        using semantic_base = xcontainer_semantic<self_type>;
+
+        using storage_type = typename base_type::storage_type;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+        using shape_type = typename base_type::shape_type;
+        using inner_shape_type = typename base_type::inner_shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+        using inner_backstrides_type = typename base_type::inner_backstrides_type;
+        using inner_strides_type = typename base_type::inner_strides_type;
+        using temporary_type = typename semantic_base::temporary_type;
+        using expression_tag = Tag;
+
+        static constexpr std::size_t N = std::tuple_size<shape_type>::value;
+        static constexpr std::size_t rank = N;
+
+        xfixed_container() = default;
+        xfixed_container(const value_type& v);
+        explicit xfixed_container(const inner_shape_type& shape, layout_type l = L);
+        explicit xfixed_container(const inner_shape_type& shape, value_type v, layout_type l = L);
+
+        // remove this enable_if when removing the other value_type constructor
+        template <class IX = std::integral_constant<std::size_t, N>, class EN = std::enable_if_t<IX::value != 0, int>>
+        xfixed_container(nested_initializer_list_t<value_type, N> t);
+
+        ~xfixed_container() = default;
+
+        xfixed_container(const xfixed_container&) = default;
+        xfixed_container& operator=(const xfixed_container&) = default;
+
+        xfixed_container(xfixed_container&&) = default;
+        xfixed_container& operator=(xfixed_container&&) = default;
+
+        template <class E>
+        xfixed_container(const xexpression<E>& e);
+
+        template <class E>
+        xfixed_container& operator=(const xexpression<E>& e);
+
+        template <class ST = std::array<std::size_t, N>>
+        static xfixed_container from_shape(ST&& /*s*/);
+
+        template <class ST = std::array<std::size_t, N>>
+        void resize(ST&& shape, bool force = false) const;
+        template <class ST = shape_type>
+        void resize(ST&& shape, layout_type l) const;
+        template <class ST = shape_type>
+        void resize(ST&& shape, const strides_type& strides) const;
+
+        template <class ST = std::array<std::size_t, N>>
+        const auto& reshape(ST&& shape, layout_type layout = L) const;
+
+        template <class ST>
+        bool broadcast_shape(ST& s, bool reuse_cache = false) const;
+
+        constexpr layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+    private:
+
+        storage_type m_storage;
+
+        XTENSOR_CONSTEXPR_ENHANCED_STATIC inner_shape_type m_shape = S();
+        XTENSOR_CONSTEXPR_ENHANCED_STATIC inner_strides_type m_strides = get_strides<L, inner_strides_type>(S());
+        XTENSOR_CONSTEXPR_ENHANCED_STATIC inner_backstrides_type
+            m_backstrides = get_backstrides(m_shape, m_strides);
+
+        storage_type& storage_impl() noexcept;
+        const storage_type& storage_impl() const noexcept;
+
+        XTENSOR_CONSTEXPR_RETURN const inner_shape_type& shape_impl() const noexcept;
+        XTENSOR_CONSTEXPR_RETURN const inner_strides_type& strides_impl() const noexcept;
+        XTENSOR_CONSTEXPR_RETURN const inner_backstrides_type& backstrides_impl() const noexcept;
+
+        friend class xcontainer<xfixed_container<ET, S, L, SH, Tag>>;
+    };
+
+#ifdef XTENSOR_HAS_CONSTEXPR_ENHANCED
+    // Out of line definitions to prevent linker errors prior to C++17
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    constexpr
+        typename xfixed_container<ET, S, L, SH, Tag>::inner_shape_type xfixed_container<ET, S, L, SH, Tag>::m_shape;
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    constexpr
+        typename xfixed_container<ET, S, L, SH, Tag>::inner_strides_type xfixed_container<ET, S, L, SH, Tag>::m_strides;
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    constexpr typename xfixed_container<ET, S, L, SH, Tag>::inner_backstrides_type
+        xfixed_container<ET, S, L, SH, Tag>::m_backstrides;
+#endif
+
+    /****************************************
+     * xfixed_container_adaptor declaration *
+     ****************************************/
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    class xfixed_adaptor;
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    struct xcontainer_inner_types<xfixed_adaptor<EC, S, L, SH, Tag>>
+    {
+        using storage_type = std::remove_reference_t<EC>;
+        using reference = typename storage_type::reference;
+        using const_reference = typename storage_type::const_reference;
+        using size_type = typename storage_type::size_type;
+        using shape_type = S;
+        using inner_shape_type = typename S::cast_type;
+        using strides_type = get_strides_t<inner_shape_type>;
+        using backstrides_type = strides_type;
+        using inner_strides_type = strides_type;
+        using inner_backstrides_type = backstrides_type;
+        using temporary_type = xfixed_container<typename storage_type::value_type, S, L, SH, Tag>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    struct xiterable_inner_types<xfixed_adaptor<EC, S, L, SH, Tag>>
+        : xcontainer_iterable_types<xfixed_adaptor<EC, S, L, SH, Tag>>
+    {
+    };
+
+    /**
+     * @class xfixed_adaptor
+     * @brief Dense multidimensional container adaptor with tensor semantic
+     * and fixed dimension.
+     *
+     * The xfixed_adaptor class implements a dense multidimensional
+     * container adaptor with tensor semantic and fixed dimension. It
+     * is used to provide a multidimensional container semantic and a
+     * tensor semantic to stl-like containers.
+     *
+     * @tparam EC The closure for the container type to adapt.
+     * @tparam S The xshape template parameter for the fixed shape of the adaptor
+     * @tparam L The layout_type of the adaptor.
+     * @tparam SH Wether the adaptor can be used as a shared expression.
+     * @tparam Tag The expression tag.
+     */
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    class xfixed_adaptor : public xcontainer<xfixed_adaptor<EC, S, L, SH, Tag>>,
+                           public xcontainer_semantic<xfixed_adaptor<EC, S, L, SH, Tag>>
+    {
+    public:
+
+        using container_closure_type = EC;
+
+        using self_type = xfixed_adaptor<EC, S, L, SH, Tag>;
+        using base_type = xcontainer<self_type>;
+        using semantic_base = xcontainer_semantic<self_type>;
+        using storage_type = typename base_type::storage_type;
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+        using inner_shape_type = typename base_type::inner_shape_type;
+        using inner_strides_type = typename base_type::inner_strides_type;
+        using inner_backstrides_type = typename base_type::inner_backstrides_type;
+        using temporary_type = typename semantic_base::temporary_type;
+        using expression_tag = Tag;
+
+        static constexpr std::size_t N = S::size();
+
+        xfixed_adaptor(storage_type&& data);
+        xfixed_adaptor(const storage_type& data);
+
+        template <class D>
+        xfixed_adaptor(D&& data);
+
+        ~xfixed_adaptor() = default;
+
+        xfixed_adaptor(const xfixed_adaptor&) = default;
+        xfixed_adaptor& operator=(const xfixed_adaptor&);
+
+        xfixed_adaptor(xfixed_adaptor&&) = default;
+        xfixed_adaptor& operator=(xfixed_adaptor&&);
+        xfixed_adaptor& operator=(temporary_type&&);
+
+        template <class E>
+        xfixed_adaptor& operator=(const xexpression<E>& e);
+
+        template <class ST = std::array<std::size_t, N>>
+        void resize(ST&& shape, bool force = false) const;
+        template <class ST = shape_type>
+        void resize(ST&& shape, layout_type l) const;
+        template <class ST = shape_type>
+        void resize(ST&& shape, const strides_type& strides) const;
+
+        template <class ST = std::array<std::size_t, N>>
+        const auto& reshape(ST&& shape, layout_type layout = L) const;
+
+        template <class ST>
+        bool broadcast_shape(ST& s, bool reuse_cache = false) const;
+
+        constexpr layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+    private:
+
+        container_closure_type m_storage;
+
+        XTENSOR_CONSTEXPR_ENHANCED_STATIC inner_shape_type m_shape = S();
+        XTENSOR_CONSTEXPR_ENHANCED_STATIC inner_strides_type m_strides = get_strides<L, inner_strides_type>(S());
+        XTENSOR_CONSTEXPR_ENHANCED_STATIC inner_backstrides_type
+            m_backstrides = get_backstrides(m_shape, m_strides);
+
+        storage_type& storage_impl() noexcept;
+        const storage_type& storage_impl() const noexcept;
+
+        XTENSOR_CONSTEXPR_RETURN const inner_shape_type& shape_impl() const noexcept;
+        XTENSOR_CONSTEXPR_RETURN const inner_strides_type& strides_impl() const noexcept;
+        XTENSOR_CONSTEXPR_RETURN const inner_backstrides_type& backstrides_impl() const noexcept;
+
+        friend class xcontainer<xfixed_adaptor<EC, S, L, SH, Tag>>;
+    };
+
+#ifdef XTENSOR_HAS_CONSTEXPR_ENHANCED
+    // Out of line definitions to prevent linker errors prior to C++17
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    constexpr
+        typename xfixed_adaptor<EC, S, L, SH, Tag>::inner_shape_type xfixed_adaptor<EC, S, L, SH, Tag>::m_shape;
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    constexpr
+        typename xfixed_adaptor<EC, S, L, SH, Tag>::inner_strides_type xfixed_adaptor<EC, S, L, SH, Tag>::m_strides;
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    constexpr typename xfixed_adaptor<EC, S, L, SH, Tag>::inner_backstrides_type
+        xfixed_adaptor<EC, S, L, SH, Tag>::m_backstrides;
+#endif
+
+    /************************************
+     * xfixed_container implementation *
+     ************************************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+
+    /**
+     * Create an uninitialized xfixed_container.
+     * Note this function is only provided for homogeneity, and the shape & layout argument is
+     * disregarded (the template shape is always used).
+     *
+     * @param shape the shape of the xfixed_container (unused!)
+     * @param l the layout_type of the xfixed_container (unused!)
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    inline xfixed_container<ET, S, L, SH, Tag>::xfixed_container(const inner_shape_type& shape, layout_type l)
+    {
+        (void) (shape);
+        (void) (l);
+        XTENSOR_ASSERT(shape.size() == N && std::equal(shape.begin(), shape.end(), m_shape.begin()));
+        XTENSOR_ASSERT(L == l);
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    inline xfixed_container<ET, S, L, SH, Tag>::xfixed_container(const value_type& v)
+    {
+        if (this->size() != 1)
+        {
+            XTENSOR_THROW(std::runtime_error, "wrong shape for scalar assignment (has to be xshape<>).");
+        }
+        m_storage[0] = v;
+    }
+
+    /**
+     * Create an xfixed_container, and initialize with the value of v.
+     * Note, the shape argument to this function is only provided for homogeneity,
+     * and the shape argument is disregarded (the template shape is always used).
+     *
+     * @param shape the shape of the xfixed_container (unused!)
+     * @param v the fill value
+     * @param l the layout_type of the xfixed_container (unused!)
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    inline xfixed_container<ET, S, L, SH, Tag>::xfixed_container(
+        const inner_shape_type& shape,
+        value_type v,
+        layout_type l
+    )
+    {
+        (void) (shape);
+        (void) (l);
+        XTENSOR_ASSERT(shape.size() == N && std::equal(shape.begin(), shape.end(), m_shape.begin()));
+        XTENSOR_ASSERT(L == l);
+        std::fill(m_storage.begin(), m_storage.end(), v);
+    }
+
+    namespace detail
+    {
+        template <std::size_t X>
+        struct check_initializer_list_shape
+        {
+            template <class T, class S>
+            static bool run(const T& t, const S& shape)
+            {
+                std::size_t IX = shape.size() - X;
+                bool result = (shape[IX] == t.size());
+                for (std::size_t i = 0; i < shape[IX]; ++i)
+                {
+                    result = result && check_initializer_list_shape<X - 1>::run(t.begin()[i], shape);
+                }
+                return result;
+            }
+        };
+
+        template <>
+        struct check_initializer_list_shape<0>
+        {
+            template <class T, class S>
+            static bool run(const T& /*t*/, const S& /*shape*/)
+            {
+                return true;
+            }
+        };
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline xfixed_container<ET, S, L, SH, Tag> xfixed_container<ET, S, L, SH, Tag>::from_shape(ST&& shape)
+    {
+        (void) shape;
+        self_type tmp;
+        XTENSOR_ASSERT(shape.size() == N && std::equal(shape.begin(), shape.end(), tmp.shape().begin()));
+        return tmp;
+    }
+
+    /**
+     * Allocates an xfixed_container with shape S with values from a C array.
+     * The type returned by get_init_type_t is raw C array ``value_type[X][Y][Z]`` for
+     * ``xt::xshape<X, Y, Z>``. C arrays can be initialized with the initializer list syntax,
+     * but the size is checked at compile time to prevent errors.
+     * Note: for clang < 3.8 this is an initializer_list and the size is not checked at compile-or runtime.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class IX, class EN>
+    inline xfixed_container<ET, S, L, SH, Tag>::xfixed_container(nested_initializer_list_t<value_type, N> t)
+    {
+        XTENSOR_ASSERT_MSG(
+            detail::check_initializer_list_shape<N>::run(t, this->shape()) == true,
+            "initializer list shape does not match fixed shape"
+        );
+        constexpr auto tmp = layout_type::row_major;
+        L == tmp ? nested_copy(m_storage.begin(), t) : nested_copy(this->template begin<tmp>(), t);
+    }
+
+    //@}
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended copy constructor.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class E>
+    inline xfixed_container<ET, S, L, SH, Tag>::xfixed_container(const xexpression<E>& e)
+    {
+        semantic_base::assign(e);
+    }
+
+    /**
+     * The extended assignment operator.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class E>
+    inline auto xfixed_container<ET, S, L, SH, Tag>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    /**
+     * Note that the xfixed_container **cannot** be resized. Attempting to resize with a different
+     * size throws an assert in debug mode.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline void xfixed_container<ET, S, L, SH, Tag>::resize(ST&& shape, bool) const
+    {
+        (void) (shape);  // remove unused parameter warning if XTENSOR_ASSERT undefined
+        XTENSOR_ASSERT(std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size());
+    }
+
+    /**
+     * Note that the xfixed_container **cannot** be resized. Attempting to resize with a different
+     * size throws an assert in debug mode.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline void xfixed_container<ET, S, L, SH, Tag>::resize(ST&& shape, layout_type l) const
+    {
+        (void) (shape);  // remove unused parameter warning if XTENSOR_ASSERT undefined
+        (void) (l);
+        XTENSOR_ASSERT(
+            std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size() && L == l
+        );
+    }
+
+    /**
+     * Note that the xfixed_container **cannot** be resized. Attempting to resize with a different
+     * size throws an assert in debug mode.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline void xfixed_container<ET, S, L, SH, Tag>::resize(ST&& shape, const strides_type& strides) const
+    {
+        (void) (shape);  // remove unused parameter warning if XTENSOR_ASSERT undefined
+        (void) (strides);
+        XTENSOR_ASSERT(std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size());
+        XTENSOR_ASSERT(
+            std::equal(strides.begin(), strides.end(), m_strides.begin()) && strides.size() == m_strides.size()
+        );
+    }
+
+    /**
+     * Note that the xfixed_container **cannot** be reshaped to a shape different from ``S``.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline const auto& xfixed_container<ET, S, L, SH, Tag>::reshape(ST&& shape, layout_type layout) const
+    {
+        if (!(std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size()
+              && layout == L))
+        {
+            XTENSOR_THROW(std::runtime_error, "Trying to reshape xtensor_fixed with different shape or layout.");
+        }
+        return *this;
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline bool xfixed_container<ET, S, L, SH, Tag>::broadcast_shape(ST& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    constexpr layout_type xfixed_container<ET, S, L, SH, Tag>::layout() const noexcept
+    {
+        return base_type::static_layout;
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    inline bool xfixed_container<ET, S, L, SH, Tag>::is_contiguous() const noexcept
+    {
+        using str_type = typename inner_strides_type::value_type;
+        return m_strides.empty() || (layout() == layout_type::row_major && m_strides.back() == str_type(1))
+               || (layout() == layout_type::column_major && m_strides.front() == str_type(1));
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    inline auto xfixed_container<ET, S, L, SH, Tag>::storage_impl() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    inline auto xfixed_container<ET, S, L, SH, Tag>::storage_impl() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    XTENSOR_CONSTEXPR_RETURN auto xfixed_container<ET, S, L, SH, Tag>::shape_impl() const noexcept
+        -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    XTENSOR_CONSTEXPR_RETURN auto xfixed_container<ET, S, L, SH, Tag>::strides_impl() const noexcept
+        -> const inner_strides_type&
+    {
+        return m_strides;
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    XTENSOR_CONSTEXPR_RETURN auto xfixed_container<ET, S, L, SH, Tag>::backstrides_impl() const noexcept
+        -> const inner_backstrides_type&
+    {
+        return m_backstrides;
+    }
+
+    /*******************
+     * xfixed_adaptor *
+     *******************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Constructs an xfixed_adaptor of the given stl-like container.
+     * @param data the container to adapt
+     */
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline xfixed_adaptor<EC, S, L, SH, Tag>::xfixed_adaptor(storage_type&& data)
+        : base_type()
+        , m_storage(std::move(data))
+    {
+    }
+
+    /**
+     * Constructs an xfixed_adaptor of the given stl-like container.
+     * @param data the container to adapt
+     */
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline xfixed_adaptor<EC, S, L, SH, Tag>::xfixed_adaptor(const storage_type& data)
+        : base_type()
+        , m_storage(data)
+    {
+    }
+
+    /**
+     * Constructs an xfixed_adaptor of the given stl-like container,
+     * with the specified shape and layout_type.
+     * @param data the container to adapt
+     */
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    template <class D>
+    inline xfixed_adaptor<EC, S, L, SH, Tag>::xfixed_adaptor(D&& data)
+        : base_type()
+        , m_storage(std::forward<D>(data))
+    {
+    }
+
+    //@}
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline auto xfixed_adaptor<EC, S, L, SH, Tag>::operator=(const xfixed_adaptor& rhs) -> self_type&
+    {
+        base_type::operator=(rhs);
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline auto xfixed_adaptor<EC, S, L, SH, Tag>::operator=(xfixed_adaptor&& rhs) -> self_type&
+    {
+        base_type::operator=(std::move(rhs));
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline auto xfixed_adaptor<EC, S, L, SH, Tag>::operator=(temporary_type&& rhs) -> self_type&
+    {
+        m_storage.resize(rhs.storage().size());
+        std::copy(rhs.storage().cbegin(), rhs.storage().cend(), m_storage.begin());
+        return *this;
+    }
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    template <class E>
+    inline auto xfixed_adaptor<EC, S, L, SH, Tag>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    /**
+     * Note that the xfixed_adaptor **cannot** be resized. Attempting to resize with a different
+     * size throws an assert in debug mode.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline void xfixed_adaptor<ET, S, L, SH, Tag>::resize(ST&& shape, bool) const
+    {
+        (void) (shape);  // remove unused parameter warning if XTENSOR_ASSERT undefined
+        XTENSOR_ASSERT(std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size());
+    }
+
+    /**
+     * Note that the xfixed_adaptor **cannot** be resized. Attempting to resize with a different
+     * size throws an assert in debug mode.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline void xfixed_adaptor<ET, S, L, SH, Tag>::resize(ST&& shape, layout_type l) const
+    {
+        (void) (shape);  // remove unused parameter warning if XTENSOR_ASSERT undefined
+        (void) (l);
+        XTENSOR_ASSERT(
+            std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size() && L == l
+        );
+    }
+
+    /**
+     * Note that the xfixed_adaptor **cannot** be resized. Attempting to resize with a different
+     * size throws an assert in debug mode.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline void xfixed_adaptor<ET, S, L, SH, Tag>::resize(ST&& shape, const strides_type& strides) const
+    {
+        (void) (shape);  // remove unused parameter warning if XTENSOR_ASSERT undefined
+        (void) (strides);
+        XTENSOR_ASSERT(std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size());
+        XTENSOR_ASSERT(
+            std::equal(strides.begin(), strides.end(), m_strides.begin()) && strides.size() == m_strides.size()
+        );
+    }
+
+    /**
+     * Note that the xfixed_container **cannot** be reshaped to a shape different from ``S``.
+     */
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline const auto& xfixed_adaptor<ET, S, L, SH, Tag>::reshape(ST&& shape, layout_type layout) const
+    {
+        if (!(std::equal(shape.begin(), shape.end(), m_shape.begin()) && shape.size() == m_shape.size()
+              && layout == L))
+        {
+            XTENSOR_THROW(std::runtime_error, "Trying to reshape xtensor_fixed with different shape or layout.");
+        }
+        return *this;
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    template <class ST>
+    inline bool xfixed_adaptor<ET, S, L, SH, Tag>::broadcast_shape(ST& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline auto xfixed_adaptor<EC, S, L, SH, Tag>::storage_impl() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline auto xfixed_adaptor<EC, S, L, SH, Tag>::storage_impl() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    constexpr layout_type xfixed_adaptor<EC, S, L, SH, Tag>::layout() const noexcept
+    {
+        return base_type::static_layout;
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    inline bool xfixed_adaptor<EC, S, L, SH, Tag>::is_contiguous() const noexcept
+    {
+        using str_type = typename inner_strides_type::value_type;
+        return m_strides.empty() || (layout() == layout_type::row_major && m_strides.back() == str_type(1))
+               || (layout() == layout_type::column_major && m_strides.front() == str_type(1));
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    XTENSOR_CONSTEXPR_RETURN auto xfixed_adaptor<EC, S, L, SH, Tag>::shape_impl() const noexcept
+        -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    XTENSOR_CONSTEXPR_RETURN auto xfixed_adaptor<EC, S, L, SH, Tag>::strides_impl() const noexcept
+        -> const inner_strides_type&
+    {
+        return m_strides;
+    }
+
+    template <class EC, class S, layout_type L, bool SH, class Tag>
+    XTENSOR_CONSTEXPR_RETURN auto xfixed_adaptor<EC, S, L, SH, Tag>::backstrides_impl() const noexcept
+        -> const inner_backstrides_type&
+    {
+        return m_backstrides;
+    }
+}
+
+#endif

+ 1193 - 0
3rd/numpy/include/xtensor/xfunction.hpp

@@ -0,0 +1,1193 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_FUNCTION_HPP
+#define XTENSOR_FUNCTION_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <iterator>
+#include <numeric>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xsequence.hpp>
+#include <xtl/xtype_traits.hpp>
+
+#include "xaccessible.hpp"
+#include "xexpression_traits.hpp"
+#include "xiterable.hpp"
+#include "xiterator.hpp"
+#include "xlayout.hpp"
+#include "xscalar.hpp"
+#include "xshape.hpp"
+#include "xstrides.hpp"
+#include "xtensor_simd.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    namespace detail
+    {
+
+        template <bool... B>
+        using conjunction_c = xtl::conjunction<std::integral_constant<bool, B>...>;
+
+        /************************
+         * xfunction_cache_impl *
+         ************************/
+
+        template <class S, class is_shape_trivial>
+        struct xfunction_cache_impl
+        {
+            S shape;
+            bool is_trivial;
+            bool is_initialized;
+
+            xfunction_cache_impl()
+                : shape(xtl::make_sequence<S>(0, std::size_t(0)))
+                , is_trivial(false)
+                , is_initialized(false)
+            {
+            }
+        };
+
+        template <std::size_t... N, class is_shape_trivial>
+        struct xfunction_cache_impl<fixed_shape<N...>, is_shape_trivial>
+        {
+            XTENSOR_CONSTEXPR_ENHANCED_STATIC fixed_shape<N...> shape = fixed_shape<N...>();
+            XTENSOR_CONSTEXPR_ENHANCED_STATIC bool is_trivial = is_shape_trivial::value;
+            XTENSOR_CONSTEXPR_ENHANCED_STATIC bool is_initialized = true;
+        };
+
+#ifdef XTENSOR_HAS_CONSTEXPR_ENHANCED
+        // Out of line definitions to prevent linker errors prior to C++17
+        template <std::size_t... N, class is_shape_trivial>
+        constexpr fixed_shape<N...> xfunction_cache_impl<fixed_shape<N...>, is_shape_trivial>::shape;
+
+        template <std::size_t... N, class is_shape_trivial>
+        constexpr bool xfunction_cache_impl<fixed_shape<N...>, is_shape_trivial>::is_trivial;
+
+        template <std::size_t... N, class is_shape_trivial>
+        constexpr bool xfunction_cache_impl<fixed_shape<N...>, is_shape_trivial>::is_initialized;
+#endif
+
+        template <class... CT>
+        struct xfunction_bool_load_type
+        {
+            using type = xtl::promote_type_t<typename std::decay_t<CT>::bool_load_type...>;
+        };
+
+        template <class CT>
+        struct xfunction_bool_load_type<CT>
+        {
+            using type = typename std::decay_t<CT>::bool_load_type;
+        };
+
+        template <class... CT>
+        using xfunction_bool_load_type_t = typename xfunction_bool_load_type<CT...>::type;
+    }
+
+    /************************
+     * xfunction extensions *
+     ************************/
+
+    namespace extension
+    {
+
+        template <class Tag, class F, class... CT>
+        struct xfunction_base_impl;
+
+        template <class F, class... CT>
+        struct xfunction_base_impl<xtensor_expression_tag, F, CT...>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class F, class... CT>
+        struct xfunction_base : xfunction_base_impl<xexpression_tag_t<CT...>, F, CT...>
+        {
+        };
+
+        template <class F, class... CT>
+        using xfunction_base_t = typename xfunction_base<F, CT...>::type;
+    }
+
+    template <class promote>
+    struct xfunction_cache : detail::xfunction_cache_impl<typename promote::type, promote>
+    {
+    };
+
+    template <class F, class... CT>
+    class xfunction_iterator;
+
+    template <class F, class... CT>
+    class xfunction_stepper;
+
+    template <class F, class... CT>
+    class xfunction;
+
+    template <class F, class... CT>
+    struct xiterable_inner_types<xfunction<F, CT...>>
+    {
+        using inner_shape_type = promote_shape_t<typename std::decay_t<CT>::shape_type...>;
+        using const_stepper = xfunction_stepper<F, CT...>;
+        using stepper = const_stepper;
+    };
+
+    template <class F, class... CT>
+    struct xcontainer_inner_types<xfunction<F, CT...>>
+    {
+        // Added indirection for MSVC 2017 bug with the operator value_type()
+        using func_return_type = typename meta_identity<
+            decltype(std::declval<F>()(std::declval<xvalue_type_t<std::decay_t<CT>>>()...))>::type;
+        using value_type = std::decay_t<func_return_type>;
+        using reference = func_return_type;
+        using const_reference = reference;
+        using size_type = common_size_type_t<std::decay_t<CT>...>;
+    };
+
+    template <class T, class F, class... CT>
+    struct has_simd_interface<xfunction<F, CT...>, T> : xtl::conjunction<
+                                                            has_simd_type<T>,
+                                                            has_simd_apply<F, xt_simd::simd_type<T>>,
+                                                            has_simd_interface<std::decay_t<CT>, T>...>
+    {
+    };
+
+    /*************************************
+     * overlapping_memory_checker_traits *
+     *************************************/
+
+    template <class E>
+    struct overlapping_memory_checker_traits<
+        E,
+        std::enable_if_t<!has_memory_address<E>::value && is_specialization_of<xfunction, E>::value>>
+    {
+        template <std::size_t I = 0, class... T, std::enable_if_t<(I == sizeof...(T)), int> = 0>
+        static bool check_tuple(const std::tuple<T...>&, const memory_range&)
+        {
+            return false;
+        }
+
+        template <std::size_t I = 0, class... T, std::enable_if_t<(I < sizeof...(T)), int> = 0>
+        static bool check_tuple(const std::tuple<T...>& t, const memory_range& dst_range)
+        {
+            using ChildE = std::decay_t<decltype(std::get<I>(t))>;
+            return overlapping_memory_checker_traits<ChildE>::check_overlap(std::get<I>(t), dst_range)
+                   || check_tuple<I + 1>(t, dst_range);
+        }
+
+        static bool check_overlap(const E& expr, const memory_range& dst_range)
+        {
+            if (expr.size() == 0)
+            {
+                return false;
+            }
+            else
+            {
+                return check_tuple(expr.arguments(), dst_range);
+            }
+        }
+    };
+
+    /*************
+     * xfunction *
+     *************/
+
+    /**
+     * @class xfunction
+     * @brief Multidimensional function operating on
+     * xtensor expressions.
+     *
+     * The xfunction class implements a multidimensional function
+     * operating on xtensor expressions.
+     *
+     * @tparam F the function type
+     * @tparam CT the closure types for arguments of the function
+     */
+    template <class F, class... CT>
+    class xfunction : private xconst_iterable<xfunction<F, CT...>>,
+                      public xsharable_expression<xfunction<F, CT...>>,
+                      private xconst_accessible<xfunction<F, CT...>>,
+                      public extension::xfunction_base_t<F, CT...>
+    {
+    public:
+
+        using self_type = xfunction<F, CT...>;
+        using accessible_base = xconst_accessible<self_type>;
+        using extension_base = extension::xfunction_base_t<F, CT...>;
+        using expression_tag = typename extension_base::expression_tag;
+        using only_scalar = all_xscalar<CT...>;
+        using functor_type = typename std::remove_reference<F>::type;
+        using tuple_type = std::tuple<CT...>;
+
+        using inner_types = xcontainer_inner_types<self_type>;
+        using value_type = typename inner_types::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = value_type*;
+        using const_pointer = const value_type*;
+        using size_type = typename inner_types::size_type;
+        using difference_type = common_difference_type_t<std::decay_t<CT>...>;
+
+        using simd_value_type = xt_simd::simd_type<value_type>;
+
+        // xtl::promote_type_t<typename std::decay_t<CT>::bool_load_type...>;
+        using bool_load_type = detail::xfunction_bool_load_type_t<CT...>;
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        using iterable_base = xconst_iterable<xfunction<F, CT...>>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using shape_type = inner_shape_type;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        static constexpr layout_type static_layout = compute_layout(std::decay_t<CT>::static_layout...);
+        static constexpr bool contiguous_layout = static_layout != layout_type::dynamic;
+
+        template <layout_type L>
+        using layout_iterator = typename iterable_base::template layout_iterator<L>;
+        template <layout_type L>
+        using const_layout_iterator = typename iterable_base::template const_layout_iterator<L>;
+        template <layout_type L>
+        using reverse_layout_iterator = typename iterable_base::template reverse_layout_iterator<L>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = typename iterable_base::template const_reverse_layout_iterator<L>;
+
+        template <class S, layout_type L>
+        using broadcast_iterator = typename iterable_base::template broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_broadcast_iterator = typename iterable_base::template const_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using reverse_broadcast_iterator = typename iterable_base::template reverse_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_reverse_broadcast_iterator = typename iterable_base::template const_reverse_broadcast_iterator<S, L>;
+
+        using const_linear_iterator = xfunction_iterator<F, CT...>;
+        using linear_iterator = const_linear_iterator;
+        using const_reverse_linear_iterator = std::reverse_iterator<const_linear_iterator>;
+        using reverse_linear_iterator = std::reverse_iterator<linear_iterator>;
+
+        using iterator = typename iterable_base::iterator;
+        using const_iterator = typename iterable_base::const_iterator;
+        using reverse_iterator = typename iterable_base::reverse_iterator;
+        using const_reverse_iterator = typename iterable_base::const_reverse_iterator;
+
+        template <class Func, class... CTA, class U = std::enable_if_t<!std::is_base_of<std::decay_t<Func>, self_type>::value>>
+        xfunction(Func&& f, CTA&&... e) noexcept;
+
+        template <class FA, class... CTA>
+        xfunction(xfunction<FA, CTA...> xf) noexcept;
+
+        ~xfunction() = default;
+
+        xfunction(const xfunction&) = default;
+        xfunction& operator=(const xfunction&) = default;
+
+        xfunction(xfunction&&) = default;
+        xfunction& operator=(xfunction&&) = default;
+
+        using accessible_base::size;
+        size_type dimension() const noexcept;
+        const inner_shape_type& shape() const;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+        using accessible_base::shape;
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        using accessible_base::at;
+        using accessible_base::operator[];
+        using accessible_base::back;
+        using accessible_base::front;
+        using accessible_base::in_bounds;
+        using accessible_base::periodic;
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+
+        using iterable_base::begin;
+        using iterable_base::cbegin;
+        using iterable_base::cend;
+        using iterable_base::crbegin;
+        using iterable_base::crend;
+        using iterable_base::end;
+        using iterable_base::rbegin;
+        using iterable_base::rend;
+
+        const_linear_iterator linear_begin() const noexcept;
+        const_linear_iterator linear_end() const noexcept;
+        const_linear_iterator linear_cbegin() const noexcept;
+        const_linear_iterator linear_cend() const noexcept;
+
+        const_reverse_linear_iterator linear_rbegin() const noexcept;
+        const_reverse_linear_iterator linear_rend() const noexcept;
+        const_reverse_linear_iterator linear_crbegin() const noexcept;
+        const_reverse_linear_iterator linear_crend() const noexcept;
+
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type l) const noexcept;
+
+        const_reference data_element(size_type i) const;
+
+        const_reference flat(size_type i) const;
+
+        template <class UT = self_type, class = typename std::enable_if<UT::only_scalar::value>::type>
+        operator value_type() const;
+
+        template <class align, class requested_type = value_type, std::size_t N = xt_simd::simd_traits<requested_type>::size>
+        simd_return_type<requested_type> load_simd(size_type i) const;
+
+        const tuple_type& arguments() const noexcept;
+
+        const functor_type& functor() const noexcept;
+
+    private:
+
+        template <std::size_t... I>
+        layout_type layout_impl(std::index_sequence<I...>) const noexcept;
+
+        template <std::size_t... I, class... Args>
+        const_reference access_impl(std::index_sequence<I...>, Args... args) const;
+
+        template <std::size_t... I, class... Args>
+        const_reference unchecked_impl(std::index_sequence<I...>, Args... args) const;
+
+        template <std::size_t... I, class It>
+        const_reference element_access_impl(std::index_sequence<I...>, It first, It last) const;
+
+        template <std::size_t... I>
+        const_reference data_element_impl(std::index_sequence<I...>, size_type i) const;
+
+        template <class align, class requested_type, std::size_t N, std::size_t... I>
+        auto load_simd_impl(std::index_sequence<I...>, size_type i) const;
+
+        template <class Func, std::size_t... I>
+        const_stepper build_stepper(Func&& f, std::index_sequence<I...>) const noexcept;
+
+        template <class Func, std::size_t... I>
+        auto build_iterator(Func&& f, std::index_sequence<I...>) const noexcept;
+
+        size_type compute_dimension() const noexcept;
+
+        void compute_cached_shape() const;
+
+        tuple_type m_e;
+        functor_type m_f;
+        mutable xfunction_cache<detail::promote_index<typename std::decay_t<CT>::shape_type...>> m_cache;
+
+        friend class xfunction_iterator<F, CT...>;
+        friend class xfunction_stepper<F, CT...>;
+        friend class xconst_iterable<self_type>;
+        friend class xconst_accessible<self_type>;
+    };
+
+    /**********************
+     * xfunction_iterator *
+     **********************/
+
+    template <class F, class... CT>
+    class xfunction_iterator : public xtl::xrandom_access_iterator_base<
+                                   xfunction_iterator<F, CT...>,
+                                   typename xfunction<F, CT...>::value_type,
+                                   typename xfunction<F, CT...>::difference_type,
+                                   typename xfunction<F, CT...>::pointer,
+                                   typename xfunction<F, CT...>::reference>
+    {
+    public:
+
+        using self_type = xfunction_iterator<F, CT...>;
+        using functor_type = typename std::remove_reference<F>::type;
+        using xfunction_type = xfunction<F, CT...>;
+
+        using value_type = typename xfunction_type::value_type;
+        using reference = typename xfunction_type::value_type;
+        using pointer = typename xfunction_type::const_pointer;
+        using difference_type = typename xfunction_type::difference_type;
+        using iterator_category = std::random_access_iterator_tag;
+
+        template <class... It>
+        xfunction_iterator(const xfunction_type* func, It&&... it) noexcept;
+
+        self_type& operator++();
+        self_type& operator--();
+
+        self_type& operator+=(difference_type n);
+        self_type& operator-=(difference_type n);
+
+        difference_type operator-(const self_type& rhs) const;
+
+        reference operator*() const;
+
+        bool equal(const self_type& rhs) const;
+        bool less_than(const self_type& rhs) const;
+
+    private:
+
+        using data_type = std::tuple<decltype(xt::linear_begin(std::declval<const std::decay_t<CT>>()))...>;
+
+        template <std::size_t... I>
+        reference deref_impl(std::index_sequence<I...>) const;
+
+        template <std::size_t... I>
+        difference_type
+        tuple_max_diff(std::index_sequence<I...>, const data_type& lhs, const data_type& rhs) const;
+
+        const xfunction_type* p_f;
+        data_type m_it;
+    };
+
+    template <class F, class... CT>
+    bool operator==(const xfunction_iterator<F, CT...>& it1, const xfunction_iterator<F, CT...>& it2);
+
+    template <class F, class... CT>
+    bool operator<(const xfunction_iterator<F, CT...>& it1, const xfunction_iterator<F, CT...>& it2);
+
+    /*********************
+     * xfunction_stepper *
+     *********************/
+
+    template <class F, class... CT>
+    class xfunction_stepper
+    {
+    public:
+
+        using self_type = xfunction_stepper<F, CT...>;
+        using functor_type = typename std::remove_reference<F>::type;
+        using xfunction_type = xfunction<F, CT...>;
+
+        using value_type = typename xfunction_type::value_type;
+        using reference = typename xfunction_type::reference;
+        using pointer = typename xfunction_type::const_pointer;
+        using size_type = typename xfunction_type::size_type;
+        using difference_type = typename xfunction_type::difference_type;
+
+        using shape_type = typename xfunction_type::shape_type;
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        template <class... St>
+        xfunction_stepper(const xfunction_type* func, St&&... st) noexcept;
+
+        void step(size_type dim);
+        void step_back(size_type dim);
+        void step(size_type dim, size_type n);
+        void step_back(size_type dim, size_type n);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type l);
+
+        reference operator*() const;
+
+        template <class T>
+        simd_return_type<T> step_simd();
+
+        void step_leading();
+
+    private:
+
+        template <std::size_t... I>
+        reference deref_impl(std::index_sequence<I...>) const;
+
+        template <class T, std::size_t... I>
+        simd_return_type<T> step_simd_impl(std::index_sequence<I...>);
+
+        const xfunction_type* p_f;
+        std::tuple<typename std::decay_t<CT>::const_stepper...> m_st;
+    };
+
+    /*********************************
+     * xfunction implementation *
+     *********************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs an xfunction applying the specified function to the given
+     * arguments.
+     * @param f the function to apply
+     * @param e the \ref xexpression arguments
+     */
+    template <class F, class... CT>
+    template <class Func, class... CTA, class U>
+    inline xfunction<F, CT...>::xfunction(Func&& f, CTA&&... e) noexcept
+        : m_e(std::forward<CTA>(e)...)
+        , m_f(std::forward<Func>(f))
+    {
+    }
+
+    /**
+     * Constructs an xfunction applying the specified function given by another
+     * xfunction with its arguments.
+     * @param xf the xfunction to apply
+     */
+    template <class F, class... CT>
+    template <class FA, class... CTA>
+    inline xfunction<F, CT...>::xfunction(xfunction<FA, CTA...> xf) noexcept
+        : m_e(xf.arguments())
+        , m_f(xf.functor())
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the number of dimensions of the function.
+     */
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::dimension() const noexcept -> size_type
+    {
+        size_type dimension = m_cache.is_initialized ? m_cache.shape.size() : compute_dimension();
+        return dimension;
+    }
+
+    template <class F, class... CT>
+    inline void xfunction<F, CT...>::compute_cached_shape() const
+    {
+        static_assert(!detail::is_fixed<shape_type>::value, "Calling compute_cached_shape on fixed!");
+
+        m_cache.shape = uninitialized_shape<xindex_type_t<inner_shape_type>>(compute_dimension());
+        m_cache.is_trivial = broadcast_shape(m_cache.shape, false);
+        m_cache.is_initialized = true;
+    }
+
+    /**
+     * Returns the shape of the xfunction.
+     */
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::shape() const -> const inner_shape_type&
+    {
+        xtl::mpl::static_if<!detail::is_fixed<inner_shape_type>::value>(
+            [&](auto self)
+            {
+                if (!m_cache.is_initialized)
+                {
+                    self(this)->compute_cached_shape();
+                }
+            },
+            [](auto /*self*/) {}
+        );
+        return m_cache.shape;
+    }
+
+    /**
+     * Returns the layout_type of the xfunction.
+     */
+    template <class F, class... CT>
+    inline layout_type xfunction<F, CT...>::layout() const noexcept
+    {
+        return layout_impl(std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    inline bool xfunction<F, CT...>::is_contiguous() const noexcept
+    {
+        return layout() != layout_type::dynamic
+               && accumulate(
+                   [](bool r, const auto& exp)
+                   {
+                       return r && exp.is_contiguous();
+                   },
+                   true,
+                   m_e
+               );
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+
+    /**
+     * Returns a constant reference to the element at the specified position in the function.
+     * @param args a list of indices specifying the position in the function. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the function.
+     */
+    template <class F, class... CT>
+    template <class... Args>
+    inline auto xfunction<F, CT...>::operator()(Args... args) const -> const_reference
+    {
+        // The static cast prevents the compiler from instantiating the template methods with signed integers,
+        // leading to warning about signed/unsigned conversions in the deeper layers of the access methods
+        return access_impl(std::make_index_sequence<sizeof...(CT)>(), static_cast<size_type>(args)...);
+    }
+
+    /**
+     * @name Data
+     */
+
+    /**
+     * Returns a constant reference to the element at the specified position of the underlying
+     * contiguous storage of the function.
+     * @param index index to underlying flat storage.
+     */
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::flat(size_type index) const -> const_reference
+    {
+        return data_element_impl(std::make_index_sequence<sizeof...(CT)>(), index);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the expression, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.unchecked(0, 1);
+     * @endcode
+     */
+    template <class F, class... CT>
+    template <class... Args>
+    inline auto xfunction<F, CT...>::unchecked(Args... args) const -> const_reference
+    {
+        // The static cast prevents the compiler from instantiating the template methods with signed integers,
+        // leading to warning about signed/unsigned conversions in the deeper layers of the access methods
+        return unchecked_impl(std::make_index_sequence<sizeof...(CT)>(), static_cast<size_type>(args)...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the function.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the container.
+     */
+    template <class F, class... CT>
+    template <class It>
+    inline auto xfunction<F, CT...>::element(It first, It last) const -> const_reference
+    {
+        return element_access_impl(std::make_index_sequence<sizeof...(CT)>(), first, last);
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the function to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache boolean for reusing a previously computed shape
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class F, class... CT>
+    template <class S>
+    inline bool xfunction<F, CT...>::broadcast_shape(S& shape, bool reuse_cache) const
+    {
+        if (m_cache.is_initialized && reuse_cache)
+        {
+            std::copy(m_cache.shape.cbegin(), m_cache.shape.cend(), shape.begin());
+            return m_cache.is_trivial;
+        }
+        else
+        {
+            // e.broadcast_shape must be evaluated even if b is false
+            auto func = [&shape](bool b, auto&& e)
+            {
+                return e.broadcast_shape(shape) && b;
+            };
+            return accumulate(func, true, m_e);
+        }
+    }
+
+    /**
+     * Checks whether the xfunction can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class F, class... CT>
+    template <class S>
+    inline bool xfunction<F, CT...>::has_linear_assign(const S& strides) const noexcept
+    {
+        auto func = [&strides](bool b, auto&& e)
+        {
+            return b && e.has_linear_assign(strides);
+        };
+        return accumulate(func, true, m_e);
+    }
+
+    //@}
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_begin() const noexcept -> const_linear_iterator
+    {
+        return linear_cbegin();
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_end() const noexcept -> const_linear_iterator
+    {
+        return linear_cend();
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_cbegin() const noexcept -> const_linear_iterator
+    {
+        auto f = [](const auto& e) noexcept
+        {
+            return xt::linear_begin(e);
+        };
+        return build_iterator(f, std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_cend() const noexcept -> const_linear_iterator
+    {
+        auto f = [](const auto& e) noexcept
+        {
+            return xt::linear_end(e);
+        };
+        return build_iterator(f, std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_rbegin() const noexcept -> const_reverse_linear_iterator
+    {
+        return linear_crbegin();
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_rend() const noexcept -> const_reverse_linear_iterator
+    {
+        return linear_crend();
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_crbegin() const noexcept -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(linear_cend());
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::linear_crend() const noexcept -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(linear_cbegin());
+    }
+
+    template <class F, class... CT>
+    template <class S>
+    inline auto xfunction<F, CT...>::stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        auto f = [&shape](const auto& e) noexcept
+        {
+            return e.stepper_begin(shape);
+        };
+        return build_stepper(f, std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    template <class S>
+    inline auto xfunction<F, CT...>::stepper_end(const S& shape, layout_type l) const noexcept -> const_stepper
+    {
+        auto f = [&shape, l](const auto& e) noexcept
+        {
+            return e.stepper_end(shape, l);
+        };
+        return build_stepper(f, std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::data_element(size_type i) const -> const_reference
+    {
+        return data_element_impl(std::make_index_sequence<sizeof...(CT)>(), i);
+    }
+
+    template <class F, class... CT>
+    template <class UT, class>
+    inline xfunction<F, CT...>::operator value_type() const
+    {
+        return operator()();
+    }
+
+    template <class F, class... CT>
+    template <class align, class requested_type, std::size_t N>
+    inline auto xfunction<F, CT...>::load_simd(size_type i) const -> simd_return_type<requested_type>
+    {
+        return load_simd_impl<align, requested_type, N>(std::make_index_sequence<sizeof...(CT)>(), i);
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::arguments() const noexcept -> const tuple_type&
+    {
+        return m_e;
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::functor() const noexcept -> const functor_type&
+    {
+        return m_f;
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I>
+    inline layout_type xfunction<F, CT...>::layout_impl(std::index_sequence<I...>) const noexcept
+    {
+        return compute_layout(std::get<I>(m_e).layout()...);
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I, class... Args>
+    inline auto xfunction<F, CT...>::access_impl(std::index_sequence<I...>, Args... args) const
+        -> const_reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        return m_f(std::get<I>(m_e)(args...)...);
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I, class... Args>
+    inline auto xfunction<F, CT...>::unchecked_impl(std::index_sequence<I...>, Args... args) const
+        -> const_reference
+    {
+        return m_f(std::get<I>(m_e).unchecked(args...)...);
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I, class It>
+    inline auto xfunction<F, CT...>::element_access_impl(std::index_sequence<I...>, It first, It last) const
+        -> const_reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return m_f((std::get<I>(m_e).element(first, last))...);
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I>
+    inline auto xfunction<F, CT...>::data_element_impl(std::index_sequence<I...>, size_type i) const
+        -> const_reference
+    {
+        return m_f((std::get<I>(m_e).data_element(i))...);
+    }
+
+    template <class F, class... CT>
+    template <class align, class requested_type, std::size_t N, std::size_t... I>
+    inline auto xfunction<F, CT...>::load_simd_impl(std::index_sequence<I...>, size_type i) const
+    {
+        return m_f.simd_apply((std::get<I>(m_e).template load_simd<align, requested_type>(i))...);
+    }
+
+    template <class F, class... CT>
+    template <class Func, std::size_t... I>
+    inline auto xfunction<F, CT...>::build_stepper(Func&& f, std::index_sequence<I...>) const noexcept
+        -> const_stepper
+    {
+        return const_stepper(this, f(std::get<I>(m_e))...);
+    }
+
+    template <class F, class... CT>
+    template <class Func, std::size_t... I>
+    inline auto xfunction<F, CT...>::build_iterator(Func&& f, std::index_sequence<I...>) const noexcept
+    {
+        return const_linear_iterator(this, f(std::get<I>(m_e))...);
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction<F, CT...>::compute_dimension() const noexcept -> size_type
+    {
+        auto func = [](size_type d, auto&& e) noexcept
+        {
+            return (std::max)(d, e.dimension());
+        };
+        return accumulate(func, size_type(0), m_e);
+    }
+
+    /*************************************
+     * xfunction_iterator implementation *
+     *************************************/
+
+    template <class F, class... CT>
+    template <class... It>
+    inline xfunction_iterator<F, CT...>::xfunction_iterator(const xfunction_type* func, It&&... it) noexcept
+        : p_f(func)
+        , m_it(std::forward<It>(it)...)
+    {
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction_iterator<F, CT...>::operator++() -> self_type&
+    {
+        auto f = [](auto& it)
+        {
+            ++it;
+        };
+        for_each(f, m_it);
+        return *this;
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction_iterator<F, CT...>::operator--() -> self_type&
+    {
+        auto f = [](auto& it)
+        {
+            return --it;
+        };
+        for_each(f, m_it);
+        return *this;
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction_iterator<F, CT...>::operator+=(difference_type n) -> self_type&
+    {
+        auto f = [n](auto& it)
+        {
+            it += n;
+        };
+        for_each(f, m_it);
+        return *this;
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction_iterator<F, CT...>::operator-=(difference_type n) -> self_type&
+    {
+        auto f = [n](auto& it)
+        {
+            it -= n;
+        };
+        for_each(f, m_it);
+        return *this;
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction_iterator<F, CT...>::operator-(const self_type& rhs) const -> difference_type
+    {
+        return tuple_max_diff(std::make_index_sequence<sizeof...(CT)>(), m_it, rhs.m_it);
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction_iterator<F, CT...>::operator*() const -> reference
+    {
+        return deref_impl(std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    inline bool xfunction_iterator<F, CT...>::equal(const self_type& rhs) const
+    {
+        // Optimization: no need to compare each subiterator since they all
+        // are incremented decremented together.
+        constexpr std::size_t temp = xtl::mpl::find_if<is_not_xdummy_iterator, data_type>::value;
+        constexpr std::size_t index = (temp == std::tuple_size<data_type>::value) ? 0 : temp;
+        return std::get<index>(m_it) == std::get<index>(rhs.m_it);
+    }
+
+    template <class F, class... CT>
+    inline bool xfunction_iterator<F, CT...>::less_than(const self_type& rhs) const
+    {
+        // Optimization: no need to compare each subiterator since they all
+        // are incremented decremented together.
+        constexpr std::size_t temp = xtl::mpl::find_if<is_not_xdummy_iterator, data_type>::value;
+        constexpr std::size_t index = (temp == std::tuple_size<data_type>::value) ? 0 : temp;
+        return std::get<index>(m_it) < std::get<index>(rhs.m_it);
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I>
+    inline auto xfunction_iterator<F, CT...>::deref_impl(std::index_sequence<I...>) const -> reference
+    {
+        return (p_f->m_f)(*std::get<I>(m_it)...);
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I>
+    inline auto xfunction_iterator<F, CT...>::tuple_max_diff(
+        std::index_sequence<I...>,
+        const data_type& lhs,
+        const data_type& rhs
+    ) const -> difference_type
+    {
+        auto diff = std::make_tuple((std::get<I>(lhs) - std::get<I>(rhs))...);
+        auto func = [](difference_type n, auto&& v)
+        {
+            return (std::max)(n, v);
+        };
+        return accumulate(func, difference_type(0), diff);
+    }
+
+    template <class F, class... CT>
+    inline bool operator==(const xfunction_iterator<F, CT...>& it1, const xfunction_iterator<F, CT...>& it2)
+    {
+        return it1.equal(it2);
+    }
+
+    template <class F, class... CT>
+    inline bool operator<(const xfunction_iterator<F, CT...>& it1, const xfunction_iterator<F, CT...>& it2)
+    {
+        return it1.less_than(it2);
+    }
+
+    /************************************
+     * xfunction_stepper implementation *
+     ************************************/
+
+    template <class F, class... CT>
+    template <class... St>
+    inline xfunction_stepper<F, CT...>::xfunction_stepper(const xfunction_type* func, St&&... st) noexcept
+        : p_f(func)
+        , m_st(std::forward<St>(st)...)
+    {
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::step(size_type dim)
+    {
+        auto f = [dim](auto& st)
+        {
+            st.step(dim);
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::step_back(size_type dim)
+    {
+        auto f = [dim](auto& st)
+        {
+            st.step_back(dim);
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::step(size_type dim, size_type n)
+    {
+        auto f = [dim, n](auto& st)
+        {
+            st.step(dim, n);
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::step_back(size_type dim, size_type n)
+    {
+        auto f = [dim, n](auto& st)
+        {
+            st.step_back(dim, n);
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::reset(size_type dim)
+    {
+        auto f = [dim](auto& st)
+        {
+            st.reset(dim);
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::reset_back(size_type dim)
+    {
+        auto f = [dim](auto& st)
+        {
+            st.reset_back(dim);
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::to_begin()
+    {
+        auto f = [](auto& st)
+        {
+            st.to_begin();
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::to_end(layout_type l)
+    {
+        auto f = [l](auto& st)
+        {
+            st.to_end(l);
+        };
+        for_each(f, m_st);
+    }
+
+    template <class F, class... CT>
+    inline auto xfunction_stepper<F, CT...>::operator*() const -> reference
+    {
+        return deref_impl(std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    template <std::size_t... I>
+    inline auto xfunction_stepper<F, CT...>::deref_impl(std::index_sequence<I...>) const -> reference
+    {
+        return (p_f->m_f)(*std::get<I>(m_st)...);
+    }
+
+    template <class F, class... CT>
+    template <class T, std::size_t... I>
+    inline auto xfunction_stepper<F, CT...>::step_simd_impl(std::index_sequence<I...>) -> simd_return_type<T>
+    {
+        return (p_f->m_f.simd_apply)(std::get<I>(m_st).template step_simd<T>()...);
+    }
+
+    template <class F, class... CT>
+    template <class T>
+    inline auto xfunction_stepper<F, CT...>::step_simd() -> simd_return_type<T>
+    {
+        return step_simd_impl<T>(std::make_index_sequence<sizeof...(CT)>());
+    }
+
+    template <class F, class... CT>
+    inline void xfunction_stepper<F, CT...>::step_leading()
+    {
+        auto step_leading_lambda = [](auto&& st)
+        {
+            st.step_leading();
+        };
+        for_each(step_leading_lambda, m_st);
+    }
+}
+
+#endif

+ 1649 - 0
3rd/numpy/include/xtensor/xfunctor_view.hpp

@@ -0,0 +1,1649 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_FUNCTOR_VIEW_HPP
+#define XTENSOR_FUNCTOR_VIEW_HPP
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xproxy_wrapper.hpp>
+
+#include "xaccessible.hpp"
+#include "xarray.hpp"
+#include "xexpression.hpp"
+#include "xiterator.hpp"
+#include "xsemantic.hpp"
+#include "xtensor.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    /**
+     * @defgroup xt_xfunctor_view
+     *
+     * Chunked array container.
+     * Defined in ``xtensor/xfunctor_view.hpp``
+     */
+
+    /************************************************
+     * xfunctor_view and xfunctor_adaptor extension *
+     ************************************************/
+
+    namespace extension
+    {
+        template <class Tag, class F, class CT>
+        struct xfunctor_view_base_impl;
+
+        template <class F, class CT>
+        struct xfunctor_view_base_impl<xtensor_expression_tag, F, CT>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class F, class CT>
+        struct xfunctor_view_base : xfunctor_view_base_impl<xexpression_tag_t<CT>, F, CT>
+        {
+        };
+
+        template <class F, class CT>
+        using xfunctor_view_base_t = typename xfunctor_view_base<F, CT>::type;
+    }
+
+    /*************************************
+     * xfunctor_applier_base declaration *
+     *************************************/
+
+    template <class F, class IT>
+    class xfunctor_iterator;
+
+    template <class F, class ST>
+    class xfunctor_stepper;
+
+    template <class D>
+    class xfunctor_applier_base : private xaccessible<D>
+    {
+    public:
+
+        using self_type = xfunctor_applier_base<D>;
+        using inner_types = xcontainer_inner_types<D>;
+        using xexpression_type = typename inner_types::xexpression_type;
+        using undecay_expression = typename inner_types::undecay_expression;
+        using functor_type = typename inner_types::functor_type;
+        using accessible_base = xaccessible<D>;
+
+        using extension_base = extension::xfunctor_view_base_t<functor_type, undecay_expression>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using value_type = typename functor_type::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = typename functor_type::pointer;
+        using const_pointer = typename functor_type::const_pointer;
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+
+        using shape_type = typename xexpression_type::shape_type;
+        using strides_type = xtl::mpl::eval_if_t<
+            has_strides<xexpression_type>,
+            detail::expr_strides_type<xexpression_type>,
+            get_strides_type<shape_type>>;
+        using backstrides_type = xtl::mpl::eval_if_t<
+            has_strides<xexpression_type>,
+            detail::expr_backstrides_type<xexpression_type>,
+            get_strides_type<shape_type>>;
+
+        using inner_shape_type = typename xexpression_type::inner_shape_type;
+        using inner_strides_type = xtl::mpl::eval_if_t<
+            has_strides<xexpression_type>,
+            detail::expr_inner_strides_type<xexpression_type>,
+            get_strides_type<shape_type>>;
+        using inner_backstrides_type = xtl::mpl::eval_if_t<
+            has_strides<xexpression_type>,
+            detail::expr_inner_backstrides_type<xexpression_type>,
+            get_strides_type<shape_type>>;
+
+        using bool_load_type = xt::bool_load_type<value_type>;
+
+        static constexpr layout_type static_layout = xexpression_type::static_layout;
+        static constexpr bool contiguous_layout = xexpression_type::contiguous_layout;
+
+        using stepper = xfunctor_stepper<functor_type, typename xexpression_type::stepper>;
+        using const_stepper = xfunctor_stepper<const functor_type, typename xexpression_type::const_stepper>;
+
+        template <layout_type L>
+        using layout_iterator = xfunctor_iterator<functor_type, typename xexpression_type::template layout_iterator<L>>;
+        template <layout_type L>
+        using const_layout_iterator = xfunctor_iterator<
+            const functor_type,
+            typename xexpression_type::template const_layout_iterator<L>>;
+
+        template <layout_type L>
+        using reverse_layout_iterator = xfunctor_iterator<
+            functor_type,
+            typename xexpression_type::template reverse_layout_iterator<L>>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = xfunctor_iterator<
+            const functor_type,
+            typename xexpression_type::template const_reverse_layout_iterator<L>>;
+
+        template <class S, layout_type L>
+        using broadcast_iterator = xfunctor_iterator<functor_type, xiterator<typename xexpression_type::stepper, S, L>>;
+        template <class S, layout_type L>
+        using const_broadcast_iterator = xfunctor_iterator<
+            functor_type,
+            xiterator<typename xexpression_type::const_stepper, S, L>>;
+
+        template <class S, layout_type L>
+        using reverse_broadcast_iterator = xfunctor_iterator<
+            functor_type,
+            typename xexpression_type::template reverse_broadcast_iterator<S, L>>;
+        template <class S, layout_type L>
+        using const_reverse_broadcast_iterator = xfunctor_iterator<
+            functor_type,
+            typename xexpression_type::template const_reverse_broadcast_iterator<S, L>>;
+
+        using linear_iterator = xfunctor_iterator<functor_type, typename xexpression_type::linear_iterator>;
+        using const_linear_iterator = xfunctor_iterator<const functor_type, typename xexpression_type::const_linear_iterator>;
+        using reverse_linear_iterator = xfunctor_iterator<functor_type, typename xexpression_type::reverse_linear_iterator>;
+        using const_reverse_linear_iterator = xfunctor_iterator<
+            const functor_type,
+            typename xexpression_type::const_reverse_linear_iterator>;
+
+        using iterator = xfunctor_iterator<functor_type, typename xexpression_type::iterator>;
+        using const_iterator = xfunctor_iterator<const functor_type, typename xexpression_type::const_iterator>;
+        using reverse_iterator = xfunctor_iterator<functor_type, typename xexpression_type::reverse_iterator>;
+        using const_reverse_iterator = xfunctor_iterator<const functor_type, typename xexpression_type::const_reverse_iterator>;
+
+        explicit xfunctor_applier_base(undecay_expression) noexcept;
+
+        template <class Func, class E>
+        xfunctor_applier_base(Func&&, E&&) noexcept;
+
+        size_type size() const noexcept;
+        const inner_shape_type& shape() const noexcept;
+        const inner_strides_type& strides() const noexcept;
+        const inner_backstrides_type& backstrides() const noexcept;
+        using accessible_base::dimension;
+        using accessible_base::shape;
+
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+        template <class... Args>
+        reference operator()(Args... args);
+
+        template <class... Args>
+        reference unchecked(Args... args);
+
+        template <class IT>
+        reference element(IT first, IT last);
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        template <class IT>
+        const_reference element(IT first, IT last) const;
+
+        using accessible_base::at;
+        using accessible_base::operator[];
+        using accessible_base::back;
+        using accessible_base::front;
+        using accessible_base::periodic;
+
+        using accessible_base::in_bounds;
+
+        xexpression_type& expression() noexcept;
+        const xexpression_type& expression() const noexcept;
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const;
+
+        template <class FCT = functor_type>
+        auto data_element(size_type i)
+            -> decltype(std::declval<FCT>()(std::declval<undecay_expression>().data_element(i)))
+        {
+            return m_functor(m_e.data_element(i));
+        }
+
+        template <class FCT = functor_type>
+        auto data_element(size_type i) const
+            -> decltype(std::declval<FCT>()(std::declval<const undecay_expression>().data_element(i)))
+        {
+            return m_functor(m_e.data_element(i));
+        }
+
+        template <class FCT = functor_type>
+        auto flat(size_type i) -> decltype(std::declval<FCT>()(std::declval<undecay_expression>().flat(i)))
+        {
+            return m_functor(m_e.flat(i));
+        }
+
+        template <class FCT = functor_type>
+        auto flat(size_type i) const
+            -> decltype(std::declval<FCT>()(std::declval<const undecay_expression>().flat(i)))
+        {
+            return m_functor(m_e.flat(i));
+        }
+
+        // The following functions are defined inline because otherwise signatures
+        // don't match on GCC.
+        template <
+            class align,
+            class requested_type = typename xexpression_type::value_type,
+            std::size_t N = xt_simd::simd_traits<requested_type>::size,
+            class FCT = functor_type>
+        auto load_simd(size_type i) const
+            -> decltype(std::declval<FCT>().template proxy_simd_load<align, requested_type, N>(
+                std::declval<undecay_expression>(),
+                i
+            ))
+        {
+            return m_functor.template proxy_simd_load<align, requested_type, N>(m_e, i);
+        }
+
+        template <class align, class simd, class FCT = functor_type>
+        auto store_simd(size_type i, const simd& e)
+            -> decltype(std::declval<FCT>()
+                            .template proxy_simd_store<align>(std::declval<undecay_expression>(), i, e))
+        {
+            return m_functor.template proxy_simd_store<align>(m_e, i, e);
+        }
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto begin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto end() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto begin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto end() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto cbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto cend() const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto rbegin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto rend() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto rbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto rend() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto crbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        auto crend() const noexcept;
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        broadcast_iterator<S, L> begin(const S& shape) noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        broadcast_iterator<S, L> end(const S& shape) noexcept;
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> begin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> end(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> cbegin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> cend(const S& shape) const noexcept;
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_broadcast_iterator<S, L> rbegin(const S& shape) noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_broadcast_iterator<S, L> rend(const S& shape) noexcept;
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> rbegin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> rend(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> crbegin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> crend(const S& shape) const noexcept;
+
+        linear_iterator linear_begin() noexcept;
+        linear_iterator linear_end() noexcept;
+
+        const_linear_iterator linear_begin() const noexcept;
+        const_linear_iterator linear_end() const noexcept;
+        const_linear_iterator linear_cbegin() const noexcept;
+        const_linear_iterator linear_cend() const noexcept;
+
+        reverse_linear_iterator linear_rbegin() noexcept;
+        reverse_linear_iterator linear_rend() noexcept;
+
+        const_reverse_linear_iterator linear_rbegin() const noexcept;
+        const_reverse_linear_iterator linear_rend() const noexcept;
+        const_reverse_linear_iterator linear_crbegin() const noexcept;
+        const_reverse_linear_iterator linear_crend() const noexcept;
+
+        template <class S>
+        stepper stepper_begin(const S& shape) noexcept;
+        template <class S>
+        stepper stepper_end(const S& shape, layout_type l) noexcept;
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type l) const noexcept;
+
+    protected:
+
+        undecay_expression m_e;
+        functor_type m_functor;
+
+    private:
+
+        friend class xaccessible<D>;
+        friend class xconst_accessible<D>;
+    };
+
+    template <class D, class T>
+    struct has_simd_interface<xfunctor_applier_base<D>, T>
+        : xtl::conjunction<
+              has_simd_type<T>,
+              has_simd_interface<typename xfunctor_applier_base<D>::xexpression_type>,
+              detail::has_simd_interface_impl<xfunctor_applier_base<D>, T>>
+    {
+    };
+
+    /********************************
+     * xfunctor_view_temporary_type *
+     ********************************/
+
+    namespace detail
+    {
+        // TODO replace with xexpression_for_shape ...
+        template <class F, class S, layout_type L>
+        struct functorview_temporary_type_impl
+        {
+            using type = xarray<typename F::value_type, L>;
+        };
+
+        template <class F, class T, std::size_t N, layout_type L>
+        struct functorview_temporary_type_impl<F, std::array<T, N>, L>
+        {
+            using type = xtensor<typename F::value_type, N, L>;
+        };
+    }
+
+    template <class F, class E>
+    struct xfunctor_view_temporary_type
+    {
+        using type = typename detail::functorview_temporary_type_impl<F, typename E::shape_type, E::static_layout>::type;
+    };
+
+    /*****************************
+     * xfunctor_view declaration *
+     *****************************/
+
+    template <class F, class CT>
+    class xfunctor_view;
+
+    template <class F, class CT>
+    struct xcontainer_inner_types<xfunctor_view<F, CT>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using undecay_expression = CT;
+        using functor_type = std::decay_t<F>;
+        using reference = decltype(std::declval<F>()(std::declval<xexpression_type>()()));
+        using const_reference = decltype(std::declval<F>()(std::declval<const xexpression_type>()()));
+        using size_type = typename xexpression_type::size_type;
+        using temporary_type = typename xfunctor_view_temporary_type<F, xexpression_type>::type;
+    };
+
+    template <class F, class CT, class T>
+    struct has_simd_interface<xfunctor_view<F, CT>, T>
+        : has_simd_interface<xfunctor_applier_base<xfunctor_view<F, CT>>, T>
+    {
+    };
+
+    /**
+     * View of an xexpression .
+     *
+     * The xt::xfunctor_view class is an expression addressing its elements by applying a functor to the
+     * corresponding element of an underlying expression.
+     * Unlike e.g. xgenerator, an xt::xfunctor_view is an lvalue.
+     * It is used e.g. to access real and imaginary parts of complex expressions.
+     *
+     * xt::xfunctor_view has a view semantics and can be used on any expression.
+     * For a similar feature with a container semantics, one can use xt::xfunctor_adaptor.
+     *
+     * xt::xfunctor_view is not meant to be used directly, but through helper functions such
+     * as xt::real or xt::imag.
+     *
+     * @ingroup xt_xfunctor_view
+     * @tparam F the functor type to be applied to the elements of specified expression.
+     * @tparam CT the closure type of the xt::xexpression type underlying this view
+     * @see xt::real, xt::imag
+     */
+    template <class F, class CT>
+    class xfunctor_view : public xfunctor_applier_base<xfunctor_view<F, CT>>,
+                          public xview_semantic<xfunctor_view<F, CT>>,
+                          public extension::xfunctor_view_base_t<F, CT>
+    {
+    public:
+
+        using self_type = xfunctor_view<F, CT>;
+        using semantic_base = xview_semantic<self_type>;
+
+        // constructors
+        using xfunctor_applier_base<self_type>::xfunctor_applier_base;
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+        template <class E>
+        using rebind_t = xfunctor_view<F, E>;
+
+        template <class E>
+        rebind_t<E> build_functor_view(E&& e) const;
+
+    private:
+
+        using temporary_type = typename xcontainer_inner_types<self_type>::temporary_type;
+        void assign_temporary_impl(temporary_type&& tmp);
+        friend class xview_semantic<self_type>;
+        friend class xaccessible<self_type>;
+    };
+
+    /********************************
+     * xfunctor_adaptor declaration *
+     ********************************/
+
+    template <class F, class CT>
+    class xfunctor_adaptor;
+
+    template <class F, class CT>
+    struct xcontainer_inner_types<xfunctor_adaptor<F, CT>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using undecay_expression = CT;
+        using functor_type = std::decay_t<F>;
+        using reference = typename functor_type::reference;
+        using const_reference = typename functor_type::const_reference;
+        using size_type = typename xexpression_type::size_type;
+        using temporary_type = typename xfunctor_view_temporary_type<F, xexpression_type>::type;
+    };
+
+    template <class F, class CT, class T>
+    struct has_simd_interface<xfunctor_adaptor<F, CT>, T>
+        : has_simd_interface<xfunctor_applier_base<xfunctor_adaptor<F, CT>>, T>
+    {
+    };
+
+    /**
+     * Adapt a container with a functor, forwarding methods such as resize / reshape.
+     *
+     * xt::xfunctor_adaptor has a container semantics and can only be used with containers.
+     * For a similar feature with a view semantics, one can use xt::xfunctor_view.
+     *
+     * @ingroup xt_xfunctor_view
+     * @tparam F the functor type to be applied to the elements of specified expression.
+     * @tparam CT the closure type of the xt::xexpression type underlying this view
+     * @see xt::xfunctor_view
+     */
+    template <class F, class CT>
+    class xfunctor_adaptor : public xfunctor_applier_base<xfunctor_adaptor<F, CT>>,
+                             public xcontainer_semantic<xfunctor_adaptor<F, CT>>,
+                             public extension::xfunctor_view_base_t<F, CT>
+    {
+    public:
+
+        using self_type = xfunctor_adaptor<F, CT>;
+        using semantic_base = xcontainer_semantic<self_type>;
+        using xexpression_type = std::decay_t<CT>;
+        using base_type = xfunctor_applier_base<self_type>;
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename xexpression_type::strides_type;
+        // constructors
+        using xfunctor_applier_base<self_type>::xfunctor_applier_base;
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+        template <class S = shape_type>
+        auto resize(S&& shape, bool force = false);
+
+        template <class S = shape_type>
+        auto resize(S&& shape, layout_type l);
+
+        template <class S = shape_type>
+        auto resize(S&& shape, const strides_type& strides);
+
+        template <class S = shape_type>
+        auto& reshape(S&& shape, layout_type layout = base_type::static_layout) &;
+
+    private:
+
+        using temporary_type = typename xcontainer_inner_types<self_type>::temporary_type;
+        void assign_temporary_impl(temporary_type&& tmp);
+        friend class xcontainer_semantic<self_type>;
+        friend class xaccessible<self_type>;
+    };
+
+    /*********************************
+     * xfunctor_iterator declaration *
+     *********************************/
+
+    template <class R>
+    struct xproxy_inner_types
+    {
+        using reference = R;
+        using pointer = std::add_pointer_t<std::remove_reference_t<R>>;
+    };
+
+    namespace detail
+    {
+        template <class F, class IT>
+        struct xfunctor_invoker
+        {
+            using type = decltype(std::declval<F>()(*(std::declval<IT>())));
+        };
+
+        template <class F, class IT>
+        using xfunctor_invoker_t = typename xfunctor_invoker<F, IT>::type;
+    }
+
+    template <class F, class IT>
+    class xfunctor_iterator : public xtl::xrandom_access_iterator_base<
+                                  xfunctor_iterator<F, IT>,
+                                  typename std::decay_t<F>::value_type,
+                                  typename std::iterator_traits<IT>::difference_type,
+                                  typename xproxy_inner_types<detail::xfunctor_invoker_t<F, IT>>::pointer,
+                                  typename xproxy_inner_types<detail::xfunctor_invoker_t<F, IT>>::reference>
+    {
+    public:
+
+        using functor_type = F;
+        using subiterator_traits = std::iterator_traits<IT>;
+
+        using proxy_inner = xproxy_inner_types<detail::xfunctor_invoker_t<F, IT>>;
+        using value_type = typename functor_type::value_type;
+        using reference = typename proxy_inner::reference;
+        using pointer = typename proxy_inner::pointer;
+        using difference_type = typename subiterator_traits::difference_type;
+        using iterator_category = typename subiterator_traits::iterator_category;
+
+        using self_type = xfunctor_iterator<F, IT>;
+
+        xfunctor_iterator(const IT&, functor_type*);
+
+        self_type& operator++();
+        self_type& operator--();
+
+        self_type& operator+=(difference_type n);
+        self_type& operator-=(difference_type n);
+
+        difference_type operator-(xfunctor_iterator rhs) const;
+
+        reference operator*() const;
+        pointer operator->() const;
+
+        bool equal(const xfunctor_iterator& rhs) const;
+        bool less_than(const xfunctor_iterator& rhs) const;
+
+    private:
+
+        IT m_it;
+        functor_type* p_functor;
+    };
+
+    template <class F, class IT>
+    bool operator==(const xfunctor_iterator<F, IT>& lhs, const xfunctor_iterator<F, IT>& rhs);
+
+    template <class F, class IT>
+    bool operator<(const xfunctor_iterator<F, IT>& lhs, const xfunctor_iterator<F, IT>& rhs);
+
+    /********************************
+     * xfunctor_stepper declaration *
+     ********************************/
+
+    template <class F, class ST>
+    class xfunctor_stepper
+    {
+    public:
+
+        using functor_type = F;
+
+        using proxy_inner = xproxy_inner_types<detail::xfunctor_invoker_t<F, ST>>;
+        using value_type = typename functor_type::value_type;
+        using reference = typename proxy_inner::reference;
+        using pointer = std::remove_reference_t<reference>*;
+        using size_type = typename ST::size_type;
+        using difference_type = typename ST::difference_type;
+
+        using shape_type = typename ST::shape_type;
+
+        xfunctor_stepper() = default;
+        xfunctor_stepper(const ST&, functor_type*);
+
+        reference operator*() const;
+
+        void step(size_type dim);
+        void step_back(size_type dim);
+        void step(size_type dim, size_type n);
+        void step_back(size_type dim, size_type n);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type);
+
+    private:
+
+        ST m_stepper;
+        functor_type* p_functor;
+    };
+
+    /****************************************
+     * xfunctor_applier_base implementation *
+     ****************************************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+
+    /**
+     * Constructs an xfunctor_applier_base expression wrappering the specified xt::xexpression.
+     *
+     * @param e the underlying expression
+     */
+    template <class D>
+    inline xfunctor_applier_base<D>::xfunctor_applier_base(undecay_expression e) noexcept
+        : m_e(e)
+        , m_functor(functor_type())
+    {
+    }
+
+    /**
+     * Constructs an xfunctor_applier_base expression wrappering the specified xt::xexpression.
+     *
+     * @param func the functor to be applied to the elements of the underlying expression.
+     * @param e the underlying expression
+     */
+    template <class D>
+    template <class Func, class E>
+    inline xfunctor_applier_base<D>::xfunctor_applier_base(Func&& func, E&& e) noexcept
+        : m_e(std::forward<E>(e))
+        , m_functor(std::forward<Func>(func))
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Size and shape
+     */
+
+    /**
+     * Returns the size of the expression.
+     */
+    template <class D>
+    inline auto xfunctor_applier_base<D>::size() const noexcept -> size_type
+    {
+        return m_e.size();
+    }
+
+    /**
+     * Returns the shape of the expression.
+     */
+    template <class D>
+    inline auto xfunctor_applier_base<D>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_e.shape();
+    }
+
+    /**
+     * Returns the strides of the expression.
+     */
+    template <class D>
+    inline auto xfunctor_applier_base<D>::strides() const noexcept -> const inner_strides_type&
+    {
+        return m_e.strides();
+    }
+
+    /**
+     * Returns the backstrides of the expression.
+     */
+    template <class D>
+    inline auto xfunctor_applier_base<D>::backstrides() const noexcept -> const inner_backstrides_type&
+    {
+        return m_e.backstrides();
+    }
+
+    /**
+     * Returns the layout_type of the expression.
+     */
+    template <class D>
+    inline layout_type xfunctor_applier_base<D>::layout() const noexcept
+    {
+        return m_e.layout();
+    }
+
+    template <class D>
+    inline bool xfunctor_applier_base<D>::is_contiguous() const noexcept
+    {
+        return m_e.is_contiguous();
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+
+    /**
+     * Returns a reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the function. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the expression.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xfunctor_applier_base<D>::operator()(Args... args) -> reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        return m_functor(m_e(args...));
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the expression, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.unchecked(0, 1);
+     * @endcode
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xfunctor_applier_base<D>::unchecked(Args... args) -> reference
+    {
+        return m_functor(m_e.unchecked(args...));
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the expression.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the function.
+     */
+    template <class D>
+    template <class IT>
+    inline auto xfunctor_applier_base<D>::element(IT first, IT last) -> reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return m_functor(m_e.element(first, last));
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the function. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the expression.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xfunctor_applier_base<D>::operator()(Args... args) const -> const_reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        return m_functor(m_e(args...));
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the expression, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xfunctor_applier_base<D>::unchecked(Args... args) const -> const_reference
+    {
+        return m_functor(m_e.unchecked(args...));
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the function.
+     */
+    template <class D>
+    template <class IT>
+    inline auto xfunctor_applier_base<D>::element(IT first, IT last) const -> const_reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return m_functor(m_e.element(first, last));
+    }
+
+    /**
+     * Returns a reference to the underlying expression of the view.
+     */
+    template <class D>
+    inline auto xfunctor_applier_base<D>::expression() noexcept -> xexpression_type&
+    {
+        return m_e;
+    }
+
+    /**
+     * Returns a consttant reference to the underlying expression of the view.
+     */
+    template <class D>
+    inline auto xfunctor_applier_base<D>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_e;
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the function to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache boolean for reusing a previously computed shape
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class D>
+    template <class S>
+    inline bool xfunctor_applier_base<D>::broadcast_shape(S& shape, bool reuse_cache) const
+    {
+        return m_e.broadcast_shape(shape, reuse_cache);
+    }
+
+    /**
+     * Checks whether the xfunctor_applier_base can be linearly assigned to an expression
+     * with the specified strides.
+     *
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class D>
+    template <class S>
+    inline bool xfunctor_applier_base<D>::has_linear_assign(const S& strides) const
+    {
+        return m_e.has_linear_assign(strides);
+    }
+
+    //@}
+
+    /**
+     * @name Iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::begin() noexcept
+    {
+        return xfunctor_iterator<functor_type, decltype(m_e.template begin<L>())>(
+            m_e.template begin<L>(),
+            &m_functor
+        );
+    }
+
+    /**
+     * Returns an iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::end() noexcept
+    {
+        return xfunctor_iterator<functor_type, decltype(m_e.template end<L>())>(m_e.template end<L>(), &m_functor);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::begin() const noexcept
+    {
+        return this->template cbegin<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::end() const noexcept
+    {
+        return this->template cend<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::cbegin() const noexcept
+    {
+        return xfunctor_iterator<const functor_type, decltype(m_e.template cbegin<L>())>(
+            m_e.template cbegin<L>(),
+            &m_functor
+        );
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::cend() const noexcept
+    {
+        return xfunctor_iterator<const functor_type, decltype(m_e.template cend<L>())>(
+            m_e.template cend<L>(),
+            &m_functor
+        );
+    }
+
+    //@}
+
+    /**
+     * @name Broadcast iterators
+     */
+    //@{
+    /**
+     * Returns a constant iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::begin(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return broadcast_iterator<S, L>(m_e.template begin<S, L>(shape), &m_functor);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::end(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return broadcast_iterator<S, L>(m_e.template end<S, L>(shape), &m_functor);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::begin(const S& shape) const noexcept
+        -> const_broadcast_iterator<S, L>
+    {
+        return cbegin<S, L>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::end(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return cend<S, L>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::cbegin(const S& shape) const noexcept
+        -> const_broadcast_iterator<S, L>
+    {
+        return const_broadcast_iterator<S, L>(m_e.template cbegin<S, L>(shape), &m_functor);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::cend(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return const_broadcast_iterator<S, L>(m_e.template cend<S, L>(shape), &m_functor);
+    }
+
+    //@}
+
+    /**
+     * @name Reverse iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::rbegin() noexcept
+    {
+        return xfunctor_iterator<functor_type, decltype(m_e.template rbegin<L>())>(
+            m_e.template rbegin<L>(),
+            &m_functor
+        );
+    }
+
+    /**
+     * Returns an iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::rend() noexcept
+    {
+        return xfunctor_iterator<functor_type, decltype(m_e.template rend<L>())>(
+            m_e.template rend<L>(),
+            &m_functor
+        );
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::rbegin() const noexcept
+    {
+        return this->template crbegin<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::rend() const noexcept
+    {
+        return this->template crend<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::crbegin() const noexcept
+    {
+        return xfunctor_iterator<const functor_type, decltype(m_e.template crbegin<L>())>(
+            m_e.template crbegin<L>(),
+            &m_functor
+        );
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xfunctor_applier_base<D>::crend() const noexcept
+    {
+        return xfunctor_iterator<const functor_type, decltype(m_e.template crend<L>())>(
+            m_e.template crend<L>(),
+            &m_functor
+        );
+    }
+
+    //@}
+
+    /**
+     * @name Reverse broadcast iterators
+     */
+
+    /**
+     * Returns an iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::rbegin(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return reverse_broadcast_iterator<S, L>(m_e.template rbegin<S, L>(shape), &m_functor);
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of the
+     * reversed expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::rend(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return reverse_broadcast_iterator<S, L>(m_e.template rend<S, L>(shape), &m_functor);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::rbegin(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return crbegin<S, L>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::rend(const S& /*shape*/) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return crend<S, L>();
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::crbegin(const S& /*shape*/) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return const_reverse_broadcast_iterator<S, L>(m_e.template crbegin<S, L>(), &m_functor);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <class S, layout_type L>
+    inline auto xfunctor_applier_base<D>::crend(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return const_reverse_broadcast_iterator<S, L>(m_e.template crend<S, L>(shape), &m_functor);
+    }
+
+    //@}
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_begin() noexcept -> linear_iterator
+    {
+        return linear_iterator(m_e.linear_begin(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_end() noexcept -> linear_iterator
+    {
+        return linear_iterator(m_e.linear_end(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_begin() const noexcept -> const_linear_iterator
+    {
+        return const_linear_iterator(m_e.linear_begin(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_end() const noexcept -> const_linear_iterator
+    {
+        return const_linear_iterator(m_e.linear_end(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_cbegin() const noexcept -> const_linear_iterator
+    {
+        return const_linear_iterator(m_e.linear_cbegin(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_cend() const noexcept -> const_linear_iterator
+    {
+        return const_linear_iterator(m_e.linear_cend(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_rbegin() noexcept -> reverse_linear_iterator
+    {
+        return reverse_linear_iterator(m_e.linear_rbegin(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_rend() noexcept -> reverse_linear_iterator
+    {
+        return reverse_linear_iterator(m_e.linear_rend(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_rbegin() const noexcept -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(m_e.linear_rbegin(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_rend() const noexcept -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(m_e.linear_rend(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_crbegin() const noexcept -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(m_e.linear_crbegin(), &m_functor);
+    }
+
+    template <class D>
+    inline auto xfunctor_applier_base<D>::linear_crend() const noexcept -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(m_e.linear_crend(), &m_functor);
+    }
+
+    /***************
+     * stepper api *
+     ***************/
+
+    template <class D>
+    template <class S>
+    inline auto xfunctor_applier_base<D>::stepper_begin(const S& shape) noexcept -> stepper
+    {
+        return stepper(m_e.stepper_begin(shape), &m_functor);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xfunctor_applier_base<D>::stepper_end(const S& shape, layout_type l) noexcept -> stepper
+    {
+        return stepper(m_e.stepper_end(shape, l), &m_functor);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xfunctor_applier_base<D>::stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        const xexpression_type& const_m_e = m_e;
+        return const_stepper(const_m_e.stepper_begin(shape), &m_functor);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xfunctor_applier_base<D>::stepper_end(const S& shape, layout_type l) const noexcept
+        -> const_stepper
+    {
+        const xexpression_type& const_m_e = m_e;
+        return const_stepper(const_m_e.stepper_end(shape, l), &m_functor);
+    }
+
+    /********************************
+     * xfunctor_view implementation *
+     ********************************/
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class F, class CT>
+    template <class E>
+    inline auto xfunctor_view<F, CT>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        bool cond = (e.derived_cast().shape().size() == this->dimension())
+                    && std::equal(this->shape().begin(), this->shape().end(), e.derived_cast().shape().begin());
+        if (!cond)
+        {
+            semantic_base::operator=(broadcast(e.derived_cast(), this->shape()));
+        }
+        else
+        {
+            semantic_base::operator=(e);
+        }
+        return *this;
+    }
+
+    //@}
+
+    template <class F, class CT>
+    template <class E>
+    inline auto xfunctor_view<F, CT>::operator=(const E& e) -> disable_xexpression<E, self_type>&
+    {
+        std::fill(this->begin(), this->end(), e);
+        return *this;
+    }
+
+    template <class F, class CT>
+    inline void xfunctor_view<F, CT>::assign_temporary_impl(temporary_type&& tmp)
+    {
+        std::copy(tmp.cbegin(), tmp.cend(), this->begin());
+    }
+
+    template <class F, class CT>
+    template <class E>
+    inline auto xfunctor_view<F, CT>::build_functor_view(E&& e) const -> rebind_t<E>
+    {
+        return rebind_t<E>((this->m_functor), std::forward<E>(e));
+    }
+
+    /***********************************
+     * xfunctor_adaptor implementation *
+     ***********************************/
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class F, class CT>
+    template <class E>
+    inline auto xfunctor_adaptor<F, CT>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        const auto& de = e.derived_cast();
+        this->m_e.resize(de.shape());
+
+        if (this->layout() == de.layout())
+        {
+            std::copy(de.linear_begin(), de.linear_end(), this->linear_begin());
+        }
+        else
+        {
+            // note: does this even select the current layout of *this* for iteration?
+            std::copy(de.begin(), de.end(), this->begin());
+        }
+
+        return *this;
+    }
+
+    //@}
+
+    template <class F, class CT>
+    template <class S>
+    auto xfunctor_adaptor<F, CT>::resize(S&& shape, bool force)
+    {
+        this->m_e.resize(std::forward<S>(shape), force);
+    }
+
+    template <class F, class CT>
+    template <class S>
+    auto xfunctor_adaptor<F, CT>::resize(S&& shape, layout_type l)
+    {
+        this->m_e.resize(std::forward<S>(shape), l);
+    }
+
+    template <class F, class CT>
+    template <class S>
+    auto xfunctor_adaptor<F, CT>::resize(S&& shape, const strides_type& strides)
+    {
+        this->m_e.resize(std::forward<S>(shape), strides);
+    }
+
+    template <class F, class CT>
+    template <class S>
+    auto& xfunctor_adaptor<F, CT>::reshape(S&& shape, layout_type layout) &
+    {
+        this->m_e.reshape(std::forward<S>(shape), layout);
+        return *this;
+    }
+
+    /************************************
+     * xfunctor_iterator implementation *
+     ************************************/
+
+    template <class F, class IT>
+    xfunctor_iterator<F, IT>::xfunctor_iterator(const IT& it, functor_type* pf)
+        : m_it(it)
+        , p_functor(pf)
+    {
+    }
+
+    template <class F, class IT>
+    inline auto xfunctor_iterator<F, IT>::operator++() -> self_type&
+    {
+        ++m_it;
+        return *this;
+    }
+
+    template <class F, class IT>
+    inline auto xfunctor_iterator<F, IT>::operator--() -> self_type&
+    {
+        --m_it;
+        return *this;
+    }
+
+    template <class F, class IT>
+    inline auto xfunctor_iterator<F, IT>::operator+=(difference_type n) -> self_type&
+    {
+        m_it += n;
+        return *this;
+    }
+
+    template <class F, class IT>
+    inline auto xfunctor_iterator<F, IT>::operator-=(difference_type n) -> self_type&
+    {
+        m_it -= n;
+        return *this;
+    }
+
+    template <class F, class IT>
+    inline auto xfunctor_iterator<F, IT>::operator-(xfunctor_iterator rhs) const -> difference_type
+    {
+        return m_it - rhs.m_it;
+    }
+
+    template <class F, class IT>
+    auto xfunctor_iterator<F, IT>::operator*() const -> reference
+    {
+        return (*p_functor)(*m_it);
+    }
+
+    template <class F, class IT>
+    auto xfunctor_iterator<F, IT>::operator->() const -> pointer
+    {
+        return &(operator*());
+    }
+
+    template <class F, class IT>
+    auto xfunctor_iterator<F, IT>::equal(const xfunctor_iterator& rhs) const -> bool
+    {
+        return m_it == rhs.m_it;
+    }
+
+    template <class F, class IT>
+    auto xfunctor_iterator<F, IT>::less_than(const xfunctor_iterator& rhs) const -> bool
+    {
+        return m_it < rhs.m_it;
+    }
+
+    template <class F, class IT>
+    bool operator==(const xfunctor_iterator<F, IT>& lhs, const xfunctor_iterator<F, IT>& rhs)
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <class F, class IT>
+    bool operator<(const xfunctor_iterator<F, IT>& lhs, const xfunctor_iterator<F, IT>& rhs)
+    {
+        return !lhs.less_than(rhs);
+    }
+
+    /***********************************
+     * xfunctor_stepper implementation *
+     ***********************************/
+
+    template <class F, class ST>
+    xfunctor_stepper<F, ST>::xfunctor_stepper(const ST& stepper, functor_type* pf)
+        : m_stepper(stepper)
+        , p_functor(pf)
+    {
+    }
+
+    template <class F, class ST>
+    auto xfunctor_stepper<F, ST>::operator*() const -> reference
+    {
+        return (*p_functor)(*m_stepper);
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::step(size_type dim)
+    {
+        m_stepper.step(dim);
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::step_back(size_type dim)
+    {
+        m_stepper.step_back(dim);
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::step(size_type dim, size_type n)
+    {
+        m_stepper.step(dim, n);
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::step_back(size_type dim, size_type n)
+    {
+        m_stepper.step_back(dim, n);
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::reset(size_type dim)
+    {
+        m_stepper.reset(dim);
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::reset_back(size_type dim)
+    {
+        m_stepper.reset_back(dim);
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::to_begin()
+    {
+        m_stepper.to_begin();
+    }
+
+    template <class F, class ST>
+    void xfunctor_stepper<F, ST>::to_end(layout_type l)
+    {
+        m_stepper.to_end(l);
+    }
+}
+#endif

+ 528 - 0
3rd/numpy/include/xtensor/xgenerator.hpp

@@ -0,0 +1,528 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_GENERATOR_HPP
+#define XTENSOR_GENERATOR_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <numeric>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xsequence.hpp>
+
+#include "xaccessible.hpp"
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+#include "xstrided_view.hpp"
+#include "xstrides.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    /************************
+     * xgenerator extension *
+     ************************/
+
+    namespace extension
+    {
+        template <class Tag, class F, class R, class S>
+        struct xgenerator_base_impl;
+
+        template <class F, class R, class S>
+        struct xgenerator_base_impl<xtensor_expression_tag, F, R, S>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class F, class R, class S>
+        struct xgenerator_base : xgenerator_base_impl<xexpression_tag_t<R>, F, R, S>
+        {
+        };
+
+        template <class F, class R, class S>
+        using xgenerator_base_t = typename xgenerator_base<F, R, S>::type;
+    }
+
+    /**************
+     * xgenerator *
+     **************/
+
+    template <class F, class R, class S>
+    class xgenerator;
+
+    template <class C, class R, class S>
+    struct xiterable_inner_types<xgenerator<C, R, S>>
+    {
+        using inner_shape_type = S;
+        using const_stepper = xindexed_stepper<xgenerator<C, R, S>, true>;
+        using stepper = const_stepper;
+    };
+
+    template <class C, class R, class S>
+    struct xcontainer_inner_types<xgenerator<C, R, S>>
+    {
+        using reference = R;
+        using const_reference = R;
+        using size_type = std::size_t;
+    };
+
+    /*************************************
+     * overlapping_memory_checker_traits *
+     *************************************/
+
+    template <class E>
+    struct overlapping_memory_checker_traits<
+        E,
+        std::enable_if_t<!has_memory_address<E>::value && is_specialization_of<xgenerator, E>::value>>
+    {
+        static bool check_overlap(const E&, const memory_range&)
+        {
+            return false;
+        }
+    };
+
+    /**
+     * @class xgenerator
+     * @brief Multidimensional function operating on indices.
+     *
+     * The xgenerator class implements a multidimensional function,
+     * generating a value from the supplied indices.
+     *
+     * @tparam F the function type
+     * @tparam R the return type of the function
+     * @tparam S the shape type of the generator
+     */
+    template <class F, class R, class S>
+    class xgenerator : public xsharable_expression<xgenerator<F, R, S>>,
+                       public xconst_iterable<xgenerator<F, R, S>>,
+                       public xconst_accessible<xgenerator<F, R, S>>,
+                       public extension::xgenerator_base_t<F, R, S>
+    {
+    public:
+
+        using self_type = xgenerator<F, R, S>;
+        using functor_type = typename std::remove_reference<F>::type;
+
+        using accessible_base = xconst_accessible<self_type>;
+        using extension_base = extension::xgenerator_base_t<F, R, S>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using inner_types = xcontainer_inner_types<self_type>;
+        using value_type = R;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = value_type*;
+        using const_pointer = const value_type*;
+        using size_type = typename inner_types::size_type;
+        using difference_type = std::ptrdiff_t;
+
+        using iterable_base = xconst_iterable<self_type>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using shape_type = inner_shape_type;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        using bool_load_type = xt::bool_load_type<R>;
+
+        static constexpr layout_type static_layout = layout_type::dynamic;
+        static constexpr bool contiguous_layout = false;
+
+        template <class Func>
+        xgenerator(Func&& f, const S& shape) noexcept;
+
+        const inner_shape_type& shape() const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+        using accessible_base::shape;
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        template <class O>
+        bool broadcast_shape(O& shape, bool reuse_cache = false) const;
+
+        template <class O>
+        bool has_linear_assign(const O& /*strides*/) const noexcept;
+
+        template <class O>
+        const_stepper stepper_begin(const O& shape) const noexcept;
+        template <class O>
+        const_stepper stepper_end(const O& shape, layout_type) const noexcept;
+
+        template <class E, class FE = F, class = std::enable_if_t<has_assign_to<E, FE>::value>>
+        void assign_to(xexpression<E>& e) const noexcept;
+
+        const functor_type& functor() const noexcept;
+
+        template <class OR, class OF>
+        using rebind_t = xgenerator<OF, OR, S>;
+
+        template <class OR, class OF>
+        rebind_t<OR, OF> build_generator(OF&& func) const;
+
+        template <class O = xt::dynamic_shape<typename shape_type::value_type>>
+        auto reshape(O&& shape) const&;
+
+        template <class O = xt::dynamic_shape<typename shape_type::value_type>>
+        auto reshape(O&& shape) &&;
+
+        template <class T>
+        auto reshape(std::initializer_list<T> shape) const&;
+
+        template <class T>
+        auto reshape(std::initializer_list<T> shape) &&;
+
+    private:
+
+        template <class O>
+        decltype(auto) compute_shape(O&& shape, std::false_type /*signed*/) const;
+
+        template <class O>
+        auto compute_shape(O&& shape, std::true_type /*signed*/) const;
+
+        template <class T>
+        auto compute_shape(std::initializer_list<T> shape) const;
+
+        template <std::size_t dim>
+        void adapt_index() const;
+
+        template <std::size_t dim, class I, class... Args>
+        void adapt_index(I& arg, Args&... args) const;
+
+        functor_type m_f;
+        inner_shape_type m_shape;
+    };
+
+    /*****************************
+     * xgenerator implementation *
+     *****************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs an xgenerator applying the specified function over the
+     * given shape.
+     * @param f the function to apply
+     * @param shape the shape of the xgenerator
+     */
+    template <class F, class R, class S>
+    template <class Func>
+    inline xgenerator<F, R, S>::xgenerator(Func&& f, const S& shape) noexcept
+        : m_f(std::forward<Func>(f))
+        , m_shape(shape)
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the shape of the xgenerator.
+     */
+    template <class F, class R, class S>
+    inline auto xgenerator<F, R, S>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    template <class F, class R, class S>
+    inline layout_type xgenerator<F, R, S>::layout() const noexcept
+    {
+        return static_layout;
+    }
+
+    template <class F, class R, class S>
+    inline bool xgenerator<F, R, S>::is_contiguous() const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+
+    /**
+     * Returns the evaluated element at the specified position in the function.
+     * @param args a list of indices specifying the position in the function. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the function.
+     */
+    template <class F, class R, class S>
+    template <class... Args>
+    inline auto xgenerator<F, R, S>::operator()(Args... args) const -> const_reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        adapt_index<0>(args...);
+        return m_f(args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the expression, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     *  code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class F, class R, class S>
+    template <class... Args>
+    inline auto xgenerator<F, R, S>::unchecked(Args... args) const -> const_reference
+    {
+        return m_f(args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the function.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the container.
+     */
+    template <class F, class R, class S>
+    template <class It>
+    inline auto xgenerator<F, R, S>::element(It first, It last) const -> const_reference
+    {
+        using bounded_iterator = xbounded_iterator<It, typename shape_type::const_iterator>;
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return m_f.element(bounded_iterator(first, shape().cbegin()), bounded_iterator(last, shape().cend()));
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the function to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class F, class R, class S>
+    template <class O>
+    inline bool xgenerator<F, R, S>::broadcast_shape(O& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    /**
+     * Checks whether the xgenerator can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class F, class R, class S>
+    template <class O>
+    inline bool xgenerator<F, R, S>::has_linear_assign(const O& /*strides*/) const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    template <class F, class R, class S>
+    template <class O>
+    inline auto xgenerator<F, R, S>::stepper_begin(const O& shape) const noexcept -> const_stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(this, offset);
+    }
+
+    template <class F, class R, class S>
+    template <class O>
+    inline auto xgenerator<F, R, S>::stepper_end(const O& shape, layout_type) const noexcept -> const_stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(this, offset, true);
+    }
+
+    template <class F, class R, class S>
+    template <class E, class, class>
+    inline void xgenerator<F, R, S>::assign_to(xexpression<E>& e) const noexcept
+    {
+        e.derived_cast().resize(m_shape);
+        m_f.assign_to(e);
+    }
+
+    template <class F, class R, class S>
+    inline auto xgenerator<F, R, S>::functor() const noexcept -> const functor_type&
+    {
+        return m_f;
+    }
+
+    template <class F, class R, class S>
+    template <class OR, class OF>
+    inline auto xgenerator<F, R, S>::build_generator(OF&& func) const -> rebind_t<OR, OF>
+    {
+        return rebind_t<OR, OF>(std::move(func), shape_type(m_shape));
+    }
+
+    /**
+     * Reshapes the generator and keeps old elements. The `shape` argument can have one of its value
+     * equal to `-1`, in this case the value is inferred from the number of elements in the generator
+     * and the remaining values in the `shape`.
+     * @code{.cpp}
+     * auto a = xt::arange<double>(50).reshape({-1, 10});
+     * //a.shape() is {5, 10}
+     * @endcode
+     * @param shape the new shape (has to have same number of elements as the original generator)
+     */
+    template <class F, class R, class S>
+    template <class O>
+    inline auto xgenerator<F, R, S>::reshape(O&& shape) const&
+    {
+        return reshape_view(*this, compute_shape(shape, xtl::is_signed<typename std::decay_t<O>::value_type>()));
+    }
+
+    template <class F, class R, class S>
+    template <class O>
+    inline auto xgenerator<F, R, S>::reshape(O&& shape) &&
+    {
+        return reshape_view(
+            std::move(*this),
+            compute_shape(shape, xtl::is_signed<typename std::decay_t<O>::value_type>())
+        );
+    }
+
+    template <class F, class R, class S>
+    template <class T>
+    inline auto xgenerator<F, R, S>::reshape(std::initializer_list<T> shape) const&
+    {
+        return reshape_view(*this, compute_shape(shape));
+    }
+
+    template <class F, class R, class S>
+    template <class T>
+    inline auto xgenerator<F, R, S>::reshape(std::initializer_list<T> shape) &&
+    {
+        return reshape_view(std::move(*this), compute_shape(shape));
+    }
+
+    template <class F, class R, class S>
+    template <class O>
+    inline decltype(auto) xgenerator<F, R, S>::compute_shape(O&& shape, std::false_type) const
+    {
+        return xtl::forward_sequence<xt::dynamic_shape<typename shape_type::value_type>, O>(shape);
+    }
+
+    template <class F, class R, class S>
+    template <class O>
+    inline auto xgenerator<F, R, S>::compute_shape(O&& shape, std::true_type) const
+    {
+        using vtype = typename shape_type::value_type;
+        xt::dynamic_shape<vtype> sh(shape.size());
+        using int_type = typename std::decay_t<O>::value_type;
+        int_type accumulator(1);
+        std::size_t neg_idx = 0;
+        std::size_t i = 0;
+        for (std::size_t j = 0; j != shape.size(); ++j, ++i)
+        {
+            auto dim = shape[j];
+            if (dim < 0)
+            {
+                XTENSOR_ASSERT(dim == -1 && !neg_idx);
+                neg_idx = i;
+            }
+            else
+            {
+                sh[j] = static_cast<vtype>(dim);
+            }
+            accumulator *= dim;
+        }
+        if (accumulator < 0)
+        {
+            sh[neg_idx] = this->size()
+                          / static_cast<size_type>(std::make_unsigned_t<int_type>(std::abs(accumulator)));
+        }
+        return sh;
+    }
+
+    template <class F, class R, class S>
+    template <class T>
+    inline auto xgenerator<F, R, S>::compute_shape(std::initializer_list<T> shape) const
+    {
+        using sh_type = xt::dynamic_shape<T>;
+        sh_type sh = xtl::make_sequence<sh_type>(shape.size());
+        std::copy(shape.begin(), shape.end(), sh.begin());
+        return compute_shape(std::move(sh), xtl::is_signed<T>());
+    }
+
+    template <class F, class R, class S>
+    template <std::size_t dim>
+    inline void xgenerator<F, R, S>::adapt_index() const
+    {
+    }
+
+    template <class F, class R, class S>
+    template <std::size_t dim, class I, class... Args>
+    inline void xgenerator<F, R, S>::adapt_index(I& arg, Args&... args) const
+    {
+        using tmp_value_type = typename decltype(m_shape)::value_type;
+        if (sizeof...(Args) + 1 > m_shape.size())
+        {
+            adapt_index<dim>(args...);
+        }
+        else
+        {
+            if (static_cast<tmp_value_type>(arg) >= m_shape[dim] && m_shape[dim] == 1)
+            {
+                arg = 0;
+            }
+            adapt_index<dim + 1>(args...);
+        }
+    }
+
+    namespace detail
+    {
+        template <class Functor, class I, std::size_t L>
+        inline auto make_xgenerator(Functor&& f, const I (&shape)[L]) noexcept
+        {
+            using shape_type = std::array<std::size_t, L>;
+            using type = xgenerator<Functor, typename Functor::value_type, shape_type>;
+            return type(std::forward<Functor>(f), xtl::forward_sequence<shape_type, decltype(shape)>(shape));
+        }
+
+        template <class Functor, class S>
+        inline auto make_xgenerator(Functor&& f, S&& shape) noexcept
+        {
+            using type = xgenerator<Functor, typename Functor::value_type, std::decay_t<S>>;
+            return type(std::forward<Functor>(f), std::forward<S>(shape));
+        }
+    }
+}
+
+#endif

+ 614 - 0
3rd/numpy/include/xtensor/xhistogram.hpp

@@ -0,0 +1,614 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+/**
+ * @brief construct histogram
+ */
+
+#ifndef XTENSOR_HISTOGRAM_HPP
+#define XTENSOR_HISTOGRAM_HPP
+
+#include "xset_operation.hpp"
+#include "xsort.hpp"
+#include "xtensor.hpp"
+#include "xview.hpp"
+
+using namespace xt::placeholders;
+
+namespace xt
+{
+    /**
+     * @ingroup digitize
+     * @brief Return the indices of the bins to which each value in input array belongs.
+     *
+     * @param data The data.
+     * @param bin_edges The bin-edges. It has to be 1-dimensional and monotonic.
+     * @param right Indicating whether the intervals include the right or the left bin edge.
+     * @return Output array of indices, of same shape as x.
+     */
+    template <class E1, class E2>
+    inline auto digitize(E1&& data, E2&& bin_edges, bool right = false)
+    {
+        XTENSOR_ASSERT(bin_edges.dimension() == 1);
+        XTENSOR_ASSERT(bin_edges.size() >= 2);
+        XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
+        XTENSOR_ASSERT(xt::amin(data)[0] >= bin_edges[0]);
+        XTENSOR_ASSERT(xt::amax(data)[0] <= bin_edges[bin_edges.size() - 1]);
+
+        return xt::searchsorted(std::forward<E2>(bin_edges), std::forward<E1>(data), right);
+    }
+
+    namespace detail
+    {
+        template <class R = double, class E1, class E2, class E3>
+        inline auto histogram_imp(E1&& data, E2&& bin_edges, E3&& weights, bool density, bool equal_bins)
+        {
+            using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>, std::decay_t<E3>>;
+            using value_type = typename std::decay_t<E3>::value_type;
+
+            XTENSOR_ASSERT(data.dimension() == 1);
+            XTENSOR_ASSERT(weights.dimension() == 1);
+            XTENSOR_ASSERT(bin_edges.dimension() == 1);
+            XTENSOR_ASSERT(weights.size() == data.size());
+            XTENSOR_ASSERT(bin_edges.size() >= 2);
+            XTENSOR_ASSERT(std::is_sorted(bin_edges.cbegin(), bin_edges.cend()));
+
+            size_t n_bins = bin_edges.size() - 1;
+            xt::xtensor<value_type, 1> count = xt::zeros<value_type>({n_bins});
+
+            if (equal_bins)
+            {
+                std::array<typename std::decay_t<E2>::value_type, 2> bounds = xt::minmax(bin_edges)();
+                auto left = static_cast<double>(bounds[0]);
+                auto right = static_cast<double>(bounds[1]);
+                double norm = 1. / (right - left);
+                for (size_t i = 0; i < data.size(); ++i)
+                {
+                    auto v = static_cast<double>(data(i));
+                    // left and right are not bounds of data
+                    if (v >= left && v < right)
+                    {
+                        auto i_bin = static_cast<size_t>(static_cast<double>(n_bins) * (v - left) * norm);
+                        count(i_bin) += weights(i);
+                    }
+                    else if (v == right)
+                    {
+                        count(n_bins - 1) += weights(i);
+                    }
+                }
+            }
+            else
+            {
+                auto sorter = xt::argsort(data);
+
+                size_type ibin = 0;
+
+                for (auto& idx : sorter)
+                {
+                    auto item = data[idx];
+
+                    if (item < bin_edges[0])
+                    {
+                        continue;
+                    }
+
+                    if (item > bin_edges[n_bins])
+                    {
+                        break;
+                    }
+
+                    while (item >= bin_edges[ibin + 1] && ibin < n_bins - 1)
+                    {
+                        ++ibin;
+                    }
+
+                    count[ibin] += weights[idx];
+                }
+            }
+
+            xt::xtensor<R, 1> prob = xt::cast<R>(count);
+
+            if (density)
+            {
+                R n = static_cast<R>(data.size());
+                for (size_type i = 0; i < bin_edges.size() - 1; ++i)
+                {
+                    prob[i] /= (static_cast<R>(bin_edges[i + 1] - bin_edges[i]) * n);
+                }
+            }
+
+            return prob;
+        }
+
+    }  // detail
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the histogram of a set of data.
+     *
+     * @param data The data.
+     * @param bin_edges The bin-edges. It has to be 1-dimensional and monotonic.
+     * @param weights Weight factors corresponding to each data-point.
+     * @param density If true the resulting integral is normalized to 1. [default: false]
+     * @return An one-dimensional xarray<double>, length: bin_edges.size()-1.
+     */
+    template <class R = double, class E1, class E2, class E3>
+    inline auto histogram(E1&& data, E2&& bin_edges, E3&& weights, bool density = false)
+    {
+        return detail::histogram_imp<R>(
+            std::forward<E1>(data),
+            std::forward<E2>(bin_edges),
+            std::forward<E3>(weights),
+            density,
+            false
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the histogram of a set of data.
+     *
+     * @param data The data.
+     * @param bin_edges The bin-edges.
+     * @param density If true the resulting integral is normalized to 1. [default: false]
+     * @return An one-dimensional xarray<double>, length: bin_edges.size()-1.
+     */
+    template <class R = double, class E1, class E2>
+    inline auto histogram(E1&& data, E2&& bin_edges, bool density = false)
+    {
+        using value_type = typename std::decay_t<E1>::value_type;
+
+        auto n = data.size();
+
+        return detail::histogram_imp<R>(
+            std::forward<E1>(data),
+            std::forward<E2>(bin_edges),
+            xt::ones<value_type>({n}),
+            density,
+            false
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the histogram of a set of data.
+     *
+     * @param data The data.
+     * @param bins The number of bins. [default: 10]
+     * @param density If true the resulting integral is normalized to 1. [default: false]
+     * @return An one-dimensional xarray<double>, length: bin_edges.size()-1.
+     */
+    template <class R = double, class E1>
+    inline auto histogram(E1&& data, std::size_t bins = 10, bool density = false)
+    {
+        using value_type = typename std::decay_t<E1>::value_type;
+
+        auto n = data.size();
+
+        return detail::histogram_imp<R>(
+            std::forward<E1>(data),
+            histogram_bin_edges(data, xt::ones<value_type>({n}), bins),
+            xt::ones<value_type>({n}),
+            density,
+            true
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the histogram of a set of data.
+     *
+     * @param data The data.
+     * @param bins The number of bins.
+     * @param left The lower-most edge.
+     * @param right The upper-most edge.
+     * @param density If true the resulting integral is normalized to 1. [default: false]
+     * @return An one-dimensional xarray<double>, length: bin_edges.size()-1.
+     */
+    template <class R = double, class E1, class E2>
+    inline auto histogram(E1&& data, std::size_t bins, E2 left, E2 right, bool density = false)
+    {
+        using value_type = typename std::decay_t<E1>::value_type;
+
+        auto n = data.size();
+
+        return detail::histogram_imp<R>(
+            std::forward<E1>(data),
+            histogram_bin_edges(data, left, right, bins),
+            xt::ones<value_type>({n}),
+            density,
+            true
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the histogram of a set of data.
+     *
+     * @param data The data.
+     * @param bins The number of bins.
+     * @param weights Weight factors corresponding to each data-point.
+     * @param density If true the resulting integral is normalized to 1. [default: false]
+     * @return An one-dimensional xarray<double>, length: bin_edges.size()-1.
+     */
+    template <class R = double, class E1, class E2>
+    inline auto histogram(E1&& data, std::size_t bins, E2&& weights, bool density = false)
+    {
+        return detail::histogram_imp<R>(
+            std::forward<E1>(data),
+            histogram_bin_edges(data, weights, bins),
+            std::forward<E2>(weights),
+            density,
+            true
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the histogram of a set of data.
+     *
+     * @param data The data.
+     * @param bins The number of bins.
+     * @param left The lower-most edge.
+     * @param right The upper-most edge.
+     * @param weights Weight factors corresponding to each data-point.
+     * @param density If true the resulting integral is normalized to 1. [default: false]
+     * @return An one-dimensional xarray<double>, length: bin_edges.size()-1.
+     */
+    template <class R = double, class E1, class E2, class E3>
+    inline auto histogram(E1&& data, std::size_t bins, E2&& weights, E3 left, E3 right, bool density = false)
+    {
+        return detail::histogram_imp<R>(
+            std::forward<E1>(data),
+            histogram_bin_edges(data, weights, left, right, bins),
+            std::forward<E2>(weights),
+            density,
+            true
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Defines different algorithms to be used in "histogram_bin_edges"
+     */
+    enum class histogram_algorithm
+    {
+        automatic,
+        linspace,
+        logspace,
+        uniform
+    };
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the bin-edges of a histogram of a set of data using different algorithms.
+     *
+     * @param data The data.
+     * @param weights Weight factors corresponding to each data-point.
+     * @param left The lower-most edge.
+     * @param right The upper-most edge.
+     * @param bins The number of bins. [default: 10]
+     * @param mode The type of algorithm to use. [default: "auto"]
+     * @return An one-dimensional xarray<double>, length: bins+1.
+     */
+    template <class E1, class E2, class E3>
+    inline auto histogram_bin_edges(
+        E1&& data,
+        E2&& weights,
+        E3 left,
+        E3 right,
+        std::size_t bins = 10,
+        histogram_algorithm mode = histogram_algorithm::automatic
+    )
+    {
+        // counter and return type
+        using size_type = common_size_type_t<std::decay_t<E1>, std::decay_t<E2>>;
+        using value_type = typename std::decay_t<E1>::value_type;
+        using weights_type = typename std::decay_t<E2>::value_type;
+
+        // basic checks
+        // - rank
+        XTENSOR_ASSERT(data.dimension() == 1);
+        XTENSOR_ASSERT(weights.dimension() == 1);
+        // - size
+        XTENSOR_ASSERT(weights.size() == data.size());
+        // - bounds
+        XTENSOR_ASSERT(left <= right);
+        // - non-empty
+        XTENSOR_ASSERT(bins > std::size_t(0));
+
+        // act on different modes
+        switch (mode)
+        {
+            // bins of equal width
+            case histogram_algorithm::automatic:
+            {
+                xt::xtensor<value_type, 1> bin_edges = xt::linspace<value_type>(left, right, bins + 1);
+                return bin_edges;
+            }
+
+            // bins of equal width
+            case histogram_algorithm::linspace:
+            {
+                xt::xtensor<value_type, 1> bin_edges = xt::linspace<value_type>(left, right, bins + 1);
+                return bin_edges;
+            }
+
+            // bins of logarithmically increasing size
+            case histogram_algorithm::logspace:
+            {
+                using rhs_value_type = std::conditional_t<xtl::is_integral<value_type>::value, double, value_type>;
+
+                xtensor<value_type, 1> bin_edges = xt::cast<value_type>(
+                    xt::logspace<rhs_value_type>(std::log10(left), std::log10(right), bins + 1)
+                );
+
+                // TODO: replace above with below after 'xsimd' fix
+                // xt::xtensor<value_type,1> bin_edges = xt::logspace<value_type>(
+                //     std::log10(left), std::log10(right), bins+1);
+
+                return bin_edges;
+            }
+
+            // same amount of data in each bin
+            case histogram_algorithm::uniform:
+            {
+                // indices that sort "data"
+                auto sorter = xt::argsort(data);
+
+                // histogram: all of equal 'height'
+                // - height
+                weights_type w = xt::sum<weights_type>(weights)[0] / static_cast<weights_type>(bins);
+                // - apply to all bins
+                xt::xtensor<weights_type, 1> count = w * xt::ones<weights_type>({bins});
+
+                // take cumulative sum, to act as easy look-up
+                count = xt::cumsum(count);
+
+                // edges
+                // - allocate
+                std::vector<size_t> shape = {bins + 1};
+                xt::xtensor<value_type, 1> bin_edges = xtensor<value_type, 1>::from_shape(shape);
+                // - first/last edge
+                bin_edges[0] = left;
+                bin_edges[bins] = right;
+                // - cumulative weight
+                weights_type cum_weight = static_cast<weights_type>(0);
+                // - current bin
+                size_type ibin = 0;
+                // - loop to find interior bin-edges
+                for (size_type i = 0; i < weights.size(); ++i)
+                {
+                    if (cum_weight >= count[ibin])
+                    {
+                        bin_edges[ibin + 1] = data[sorter[i]];
+                        ++ibin;
+                    }
+                    cum_weight += weights[sorter[i]];
+                }
+                return bin_edges;
+            }
+
+            // bins of equal width
+            default:
+            {
+                xt::xtensor<value_type, 1> bin_edges = xt::linspace<value_type>(left, right, bins + 1);
+                return bin_edges;
+            }
+        }
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the bin-edges of a histogram of a set of data using different algorithms.
+     *
+     * @param data The data.
+     * @param weights Weight factors corresponding to each data-point.
+     * @param bins The number of bins. [default: 10]
+     * @param mode The type of algorithm to use. [default: "auto"]
+     * @return An one-dimensional xarray<double>, length: bins+1.
+     */
+    template <class E1, class E2>
+    inline auto histogram_bin_edges(
+        E1&& data,
+        E2&& weights,
+        std::size_t bins = 10,
+        histogram_algorithm mode = histogram_algorithm::automatic
+    )
+    {
+        using value_type = typename std::decay_t<E1>::value_type;
+        std::array<value_type, 2> left_right;
+        left_right = xt::minmax(data)();
+
+        return histogram_bin_edges(
+            std::forward<E1>(data),
+            std::forward<E2>(weights),
+            left_right[0],
+            left_right[1],
+            bins,
+            mode
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the bin-edges of a histogram of a set of data using different algorithms.
+     *
+     * @param data The data.
+     * @param bins The number of bins. [default: 10]
+     * @param mode The type of algorithm to use. [default: "auto"]
+     * @return An one-dimensional xarray<double>, length: bins+1.
+     */
+    template <class E1>
+    inline auto
+    histogram_bin_edges(E1&& data, std::size_t bins = 10, histogram_algorithm mode = histogram_algorithm::automatic)
+    {
+        using value_type = typename std::decay_t<E1>::value_type;
+
+        auto n = data.size();
+        std::array<value_type, 2> left_right;
+        left_right = xt::minmax(data)();
+
+        return histogram_bin_edges(
+            std::forward<E1>(data),
+            xt::ones<value_type>({n}),
+            left_right[0],
+            left_right[1],
+            bins,
+            mode
+        );
+    }
+
+    /**
+     * @ingroup histogram
+     * @brief Compute the bin-edges of a histogram of a set of data using different algorithms.
+     *
+     * @param data The data.
+     * @param left The lower-most edge.
+     * @param right The upper-most edge.
+     * @param bins The number of bins. [default: 10]
+     * @param mode The type of algorithm to use. [default: "auto"]
+     * @return An one-dimensional xarray<double>, length: bins+1.
+     */
+    template <class E1, class E2>
+    inline auto histogram_bin_edges(
+        E1&& data,
+        E2 left,
+        E2 right,
+        std::size_t bins = 10,
+        histogram_algorithm mode = histogram_algorithm::automatic
+    )
+    {
+        using value_type = typename std::decay_t<E1>::value_type;
+
+        auto n = data.size();
+
+        return histogram_bin_edges(std::forward<E1>(data), xt::ones<value_type>({n}), left, right, bins, mode);
+    }
+
+    /**
+     * Count number of occurrences of each value in array of non-negative ints.
+     *
+     * The number of bins (of size 1) is one larger than the largest value in x.
+     * If minlength is specified, there will be at least this number of bins in
+     * the output array (though it will be longer if necessary, depending on the
+     * contents of x). Each bin gives the number of occurrences of its index
+     * value in x. If weights is specified the input array is weighted by it,
+     * i.e. if a value ``n`` is found at position ``i``, ``out[n] += weight[i]``
+     * instead of ``out[n] += 1``.
+     *
+     * @param data the 1D container with integers to count into bins
+     * @param weights a 1D container with the same number of elements as ``data``
+     * @param minlength The minlength
+     *
+     * @return 1D container with the bincount
+     */
+    template <class E1, class E2, XTL_REQUIRES(is_xexpression<std::decay_t<E2>>)>
+    inline auto bincount(E1&& data, E2&& weights, std::size_t minlength = 0)
+    {
+        using result_value_type = typename std::decay_t<E2>::value_type;
+        using input_value_type = typename std::decay_t<E1>::value_type;
+        using size_type = typename std::decay_t<E1>::size_type;
+
+        static_assert(
+            xtl::is_integral<typename std::decay_t<E1>::value_type>::value,
+            "Bincount data has to be integral type."
+        );
+        XTENSOR_ASSERT(data.dimension() == 1);
+        XTENSOR_ASSERT(weights.dimension() == 1);
+
+        std::array<input_value_type, 2> left_right;
+        left_right = xt::minmax(data)();
+
+        if (left_right[0] < input_value_type(0))
+        {
+            XTENSOR_THROW(std::runtime_error, "Data argument for bincount can only contain positive integers!");
+        }
+
+        xt::xtensor<result_value_type, 1> res = xt::zeros<result_value_type>(
+            {(std::max)(minlength, std::size_t(left_right[1] + 1))}
+        );
+
+        for (size_type i = 0; i < data.size(); ++i)
+        {
+            res(data(i)) += weights(i);
+        }
+
+        return res;
+    }
+
+    template <class E1>
+    inline auto bincount(E1&& data, std::size_t minlength = 0)
+    {
+        return bincount(
+            std::forward<E1>(data),
+            xt::ones<typename std::decay_t<E1>::value_type>(data.shape()),
+            minlength
+        );
+    }
+
+    /**
+     * Get the number of items in each bin, given the fraction of items per bin.
+     * The output is such that the total number of items of all bins is exactly "N".
+     *
+     * @param N the number of items to distribute
+     * @param weights fraction of items per bin: a 1D container whose size is the number of bins
+     *
+     * @return 1D container with the number of items per bin
+     */
+    template <class E>
+    inline xt::xtensor<size_t, 1> bin_items(size_t N, E&& weights)
+    {
+        if (weights.size() <= std::size_t(1))
+        {
+            xt::xtensor<size_t, 1> n = N * xt::ones<size_t>({1});
+            return n;
+        }
+
+#ifdef XTENSOR_ENABLE_ASSERT
+        using value_type = typename std::decay_t<E>::value_type;
+
+        XTENSOR_ASSERT(xt::all(weights >= static_cast<value_type>(0)));
+        XTENSOR_ASSERT(xt::sum(weights)() > static_cast<value_type>(0));
+#endif
+
+        xt::xtensor<double, 1> P = xt::cast<double>(weights) / static_cast<double>(xt::sum(weights)());
+        xt::xtensor<size_t, 1> n = xt::ceil(static_cast<double>(N) * P);
+
+        if (xt::sum(n)() == N)
+        {
+            return n;
+        }
+
+        xt::xtensor<size_t, 1> d = xt::zeros<size_t>(P.shape());
+        xt::xtensor<size_t, 1> sorter = xt::argsort(P);
+        sorter = xt::view(sorter, xt::range(P.size(), _, -1));
+        sorter = xt::view(sorter, xt::range(0, xt::sum(n)(0) - N));
+        xt::view(d, xt::keep(sorter)) = 1;
+        n -= d;
+
+        return n;
+    }
+
+    /**
+     * Get the number of items in each bin, with each bin having approximately the same number of
+     * items in it,under the constraint that the total number of items of all bins is exactly "N".
+     *
+     * @param N the number of items to distribute
+     * @param bins the number of bins
+     *
+     * @return 1D container with the number of items per bin
+     */
+    inline xt::xtensor<size_t, 1> bin_items(size_t N, size_t bins)
+    {
+        return bin_items(N, xt::ones<double>({bins}));
+    }
+}
+
+#endif

+ 852 - 0
3rd/numpy/include/xtensor/xindex_view.hpp

@@ -0,0 +1,852 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_INDEX_VIEW_HPP
+#define XTENSOR_INDEX_VIEW_HPP
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+#include "xoperation.hpp"
+#include "xsemantic.hpp"
+#include "xstrides.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    /*************************
+     * xindex_view extension *
+     *************************/
+
+    namespace extension
+    {
+        template <class Tag, class CT, class I>
+        struct xindex_view_base_impl;
+
+        template <class CT, class I>
+        struct xindex_view_base_impl<xtensor_expression_tag, CT, I>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class CT, class I>
+        struct xindex_view_base : xindex_view_base_impl<xexpression_tag_t<CT>, CT, I>
+        {
+        };
+
+        template <class CT, class I>
+        using xindex_view_base_t = typename xindex_view_base<CT, I>::type;
+    }
+
+    /***************
+     * xindex_view *
+     ***************/
+
+    template <class CT, class I>
+    class xindex_view;
+
+    template <class CT, class I>
+    struct xcontainer_inner_types<xindex_view<CT, I>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using temporary_type = xarray<typename xexpression_type::value_type, xexpression_type::static_layout>;
+    };
+
+    template <class CT, class I>
+    struct xiterable_inner_types<xindex_view<CT, I>>
+    {
+        using inner_shape_type = std::array<std::size_t, 1>;
+        using const_stepper = xindexed_stepper<xindex_view<CT, I>, true>;
+        using stepper = xindexed_stepper<xindex_view<CT, I>, false>;
+    };
+
+    /**
+     * @class xindex_view
+     * @brief View of an xexpression from vector of indices.
+     *
+     * The xindex_view class implements a flat (1D) view into a multidimensional
+     * xexpression yielding the values at the indices of the index array.
+     * xindex_view is not meant to be used directly, but only with the \ref index_view
+     * and \ref filter helper functions.
+     *
+     * @tparam CT the closure type of the \ref xexpression type underlying this view
+     * @tparam I the index array type of the view
+     *
+     * @sa index_view, filter
+     */
+    template <class CT, class I>
+    class xindex_view : public xview_semantic<xindex_view<CT, I>>,
+                        public xiterable<xindex_view<CT, I>>,
+                        public extension::xindex_view_base_t<CT, I>
+    {
+    public:
+
+        using self_type = xindex_view<CT, I>;
+        using xexpression_type = std::decay_t<CT>;
+        using semantic_base = xview_semantic<self_type>;
+
+        using extension_base = extension::xindex_view_base_t<CT, I>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using value_type = typename xexpression_type::value_type;
+        using reference = inner_reference_t<CT>;
+        using const_reference = typename xexpression_type::const_reference;
+        using pointer = typename xexpression_type::pointer;
+        using const_pointer = typename xexpression_type::const_pointer;
+        using size_type = typename xexpression_type::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+
+        using iterable_base = xiterable<self_type>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using shape_type = inner_shape_type;
+
+        using indices_type = I;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        using temporary_type = typename xcontainer_inner_types<self_type>::temporary_type;
+        using base_index_type = xindex_type_t<shape_type>;
+
+        using bool_load_type = typename xexpression_type::bool_load_type;
+
+        static constexpr layout_type static_layout = layout_type::dynamic;
+        static constexpr bool contiguous_layout = false;
+
+        template <class CTA, class I2>
+        xindex_view(CTA&& e, I2&& indices) noexcept;
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+        size_type size() const noexcept;
+        size_type dimension() const noexcept;
+        const inner_shape_type& shape() const noexcept;
+        size_type shape(size_type index) const;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+        template <class T>
+        void fill(const T& value);
+
+        reference operator()(size_type idx = size_type(0));
+        template <class... Args>
+        reference operator()(size_type idx0, size_type idx1, Args... args);
+        reference unchecked(size_type idx);
+        template <class S>
+        disable_integral_t<S, reference> operator[](const S& index);
+        template <class OI>
+        reference operator[](std::initializer_list<OI> index);
+        reference operator[](size_type i);
+
+        template <class It>
+        reference element(It first, It last);
+
+        const_reference operator()(size_type idx = size_type(0)) const;
+        template <class... Args>
+        const_reference operator()(size_type idx0, size_type idx1, Args... args) const;
+        const_reference unchecked(size_type idx) const;
+        template <class S>
+        disable_integral_t<S, const_reference> operator[](const S& index) const;
+        template <class OI>
+        const_reference operator[](std::initializer_list<OI> index) const;
+        const_reference operator[](size_type i) const;
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        xexpression_type& expression() noexcept;
+        const xexpression_type& expression() const noexcept;
+
+        template <class O>
+        bool broadcast_shape(O& shape, bool reuse_cache = false) const;
+
+        template <class O>
+        bool has_linear_assign(const O& /*strides*/) const noexcept;
+
+        template <class ST>
+        stepper stepper_begin(const ST& shape);
+        template <class ST>
+        stepper stepper_end(const ST& shape, layout_type);
+
+        template <class ST>
+        const_stepper stepper_begin(const ST& shape) const;
+        template <class ST>
+        const_stepper stepper_end(const ST& shape, layout_type) const;
+
+        template <class E>
+        using rebind_t = xindex_view<E, I>;
+
+        template <class E>
+        rebind_t<E> build_index_view(E&& e) const;
+
+    private:
+
+        CT m_e;
+        const indices_type m_indices;
+        const inner_shape_type m_shape;
+
+        void assign_temporary_impl(temporary_type&& tmp);
+
+        friend class xview_semantic<xindex_view<CT, I>>;
+    };
+
+    /***************
+     * xfiltration *
+     ***************/
+
+    /**
+     * @class xfiltration
+     * @brief Filter of a xexpression for fast scalar assign.
+     *
+     * The xfiltration class implements a lazy filtration of a multidimentional
+     * \ref xexpression, optimized for scalar and computed scalar assignments.
+     * Actually, the \ref xfiltration class IS NOT an \ref xexpression and the
+     * scalar and computed scalar assignments are the only method it provides.
+     * The filtering condition is not evaluated until the filtration is assigned.
+     *
+     * xfiltration is not meant to be used directly, but only with the \ref filtration
+     * helper function.
+     *
+     * @tparam ECT the closure type of the \ref xexpression type underlying this filtration
+     * @tparam CCR the closure type of the filtering \ref xexpression type
+     *
+     * @sa filtration
+     */
+    template <class ECT, class CCT>
+    class xfiltration
+    {
+    public:
+
+        using self_type = xfiltration<ECT, CCT>;
+        using xexpression_type = std::decay_t<ECT>;
+        using const_reference = typename xexpression_type::const_reference;
+
+        template <class ECTA, class CCTA>
+        xfiltration(ECTA&& e, CCTA&& condition);
+
+        template <class E>
+        disable_xexpression<E, self_type&> operator=(const E&);
+
+        template <class E>
+        disable_xexpression<E, self_type&> operator+=(const E&);
+
+        template <class E>
+        disable_xexpression<E, self_type&> operator-=(const E&);
+
+        template <class E>
+        disable_xexpression<E, self_type&> operator*=(const E&);
+
+        template <class E>
+        disable_xexpression<E, self_type&> operator/=(const E&);
+
+        template <class E>
+        disable_xexpression<E, self_type&> operator%=(const E&);
+
+    private:
+
+        template <class F>
+        self_type& apply(F&& func);
+
+        ECT m_e;
+        CCT m_condition;
+    };
+
+    /******************************
+     * xindex_view implementation *
+     ******************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs an xindex_view, selecting the indices specified by \a indices.
+     * The resulting xexpression has a 1D shape with a length of n for n indices.
+     *
+     * @param e the underlying xexpression for this view
+     * @param indices the indices to select
+     */
+    template <class CT, class I>
+    template <class CTA, class I2>
+    inline xindex_view<CT, I>::xindex_view(CTA&& e, I2&& indices) noexcept
+        : m_e(std::forward<CTA>(e))
+        , m_indices(std::forward<I2>(indices))
+        , m_shape({m_indices.size()})
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class CT, class I>
+    template <class E>
+    inline auto xindex_view<CT, I>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class CT, class I>
+    template <class E>
+    inline auto xindex_view<CT, I>::operator=(const E& e) -> disable_xexpression<E, self_type>&
+    {
+        std::fill(this->begin(), this->end(), e);
+        return *this;
+    }
+
+    template <class CT, class I>
+    inline void xindex_view<CT, I>::assign_temporary_impl(temporary_type&& tmp)
+    {
+        std::copy(tmp.cbegin(), tmp.cend(), this->begin());
+    }
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the size of the xindex_view.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::size() const noexcept -> size_type
+    {
+        return compute_size(shape());
+    }
+
+    /**
+     * Returns the number of dimensions of the xindex_view.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::dimension() const noexcept -> size_type
+    {
+        return 1;
+    }
+
+    /**
+     * Returns the shape of the xindex_view.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    /**
+     * Returns the i-th dimension of the expression.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::shape(size_type i) const -> size_type
+    {
+        return m_shape[i];
+    }
+
+    template <class CT, class I>
+    inline layout_type xindex_view<CT, I>::layout() const noexcept
+    {
+        return static_layout;
+    }
+
+    template <class CT, class I>
+    inline bool xindex_view<CT, I>::is_contiguous() const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+    //@{
+
+    /**
+     * Fills the view with the given value.
+     * @param value the value to fill the view with.
+     */
+    template <class CT, class I>
+    template <class T>
+    inline void xindex_view<CT, I>::fill(const T& value)
+    {
+        std::fill(this->begin(), this->end(), value);
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the xindex_view.
+     * @param idx index specifying the position in the index_view. More indices may be provided,
+     * only the last one will be used.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::operator()(size_type idx) -> reference
+    {
+        return m_e[m_indices[idx]];
+    }
+
+    template <class CT, class I>
+    template <class... Args>
+    inline auto xindex_view<CT, I>::operator()(size_type, size_type idx1, Args... args) -> reference
+    {
+        return this->operator()(idx1, static_cast<size_type>(args)...);
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the xindex_view.
+     * @param idx index specifying the position in the index_view.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::unchecked(size_type idx) -> reference
+    {
+        return this->operator()(idx);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the xindex_view.
+     * @param idx index specifying the position in the index_view. More indices may be provided,
+     * only the last one will be used.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::operator()(size_type idx) const -> const_reference
+    {
+        return m_e[m_indices[idx]];
+    }
+
+    template <class CT, class I>
+    template <class... Args>
+    inline auto xindex_view<CT, I>::operator()(size_type, size_type idx1, Args... args) const -> const_reference
+    {
+        return this->operator()(idx1, args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the xindex_view.
+     * @param idx index specifying the position in the index_view.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::unchecked(size_type idx) const -> const_reference
+    {
+        return this->operator()(idx);
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the container.
+     * @param index a sequence of indices specifying the position in the container. Indices
+     * must be unsigned integers, the number of indices in the list should be equal or greater
+     * than the number of dimensions of the container.
+     */
+    template <class CT, class I>
+    template <class S>
+    inline auto xindex_view<CT, I>::operator[](const S& index) -> disable_integral_t<S, reference>
+    {
+        return m_e[m_indices[index[0]]];
+    }
+
+    template <class CT, class I>
+    template <class OI>
+    inline auto xindex_view<CT, I>::operator[](std::initializer_list<OI> index) -> reference
+    {
+        return m_e[m_indices[*(index.begin())]];
+    }
+
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::operator[](size_type i) -> reference
+    {
+        return operator()(i);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the container.
+     * @param index a sequence of indices specifying the position in the container. Indices
+     * must be unsigned integers, the number of indices in the list should be equal or greater
+     * than the number of dimensions of the container.
+     */
+    template <class CT, class I>
+    template <class S>
+    inline auto xindex_view<CT, I>::operator[](const S& index) const -> disable_integral_t<S, const_reference>
+    {
+        return m_e[m_indices[index[0]]];
+    }
+
+    template <class CT, class I>
+    template <class OI>
+    inline auto xindex_view<CT, I>::operator[](std::initializer_list<OI> index) const -> const_reference
+    {
+        return m_e[m_indices[*(index.begin())]];
+    }
+
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::operator[](size_type i) const -> const_reference
+    {
+        return operator()(i);
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the xindex_view.
+     * @param first iterator starting the sequence of indices
+     * The number of indices in the sequence should be equal to or greater 1.
+     */
+    template <class CT, class I>
+    template <class It>
+    inline auto xindex_view<CT, I>::element(It first, It /*last*/) -> reference
+    {
+        return m_e[m_indices[(*first)]];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the xindex_view.
+     * @param first iterator starting the sequence of indices
+     * The number of indices in the sequence should be equal to or greater 1.
+     */
+    template <class CT, class I>
+    template <class It>
+    inline auto xindex_view<CT, I>::element(It first, It /*last*/) const -> const_reference
+    {
+        return m_e[m_indices[(*first)]];
+    }
+
+    /**
+     * Returns a reference to the underlying expression of the view.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::expression() noexcept -> xexpression_type&
+    {
+        return m_e;
+    }
+
+    /**
+     * Returns a constant reference to the underlying expression of the view.
+     */
+    template <class CT, class I>
+    inline auto xindex_view<CT, I>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_e;
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the xindex_view to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class CT, class I>
+    template <class O>
+    inline bool xindex_view<CT, I>::broadcast_shape(O& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    /**
+     * Checks whether the xindex_view can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class CT, class I>
+    template <class O>
+    inline bool xindex_view<CT, I>::has_linear_assign(const O& /*strides*/) const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    /***************
+     * stepper api *
+     ***************/
+
+    template <class CT, class I>
+    template <class ST>
+    inline auto xindex_view<CT, I>::stepper_begin(const ST& shape) -> stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, offset);
+    }
+
+    template <class CT, class I>
+    template <class ST>
+    inline auto xindex_view<CT, I>::stepper_end(const ST& shape, layout_type) -> stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, offset, true);
+    }
+
+    template <class CT, class I>
+    template <class ST>
+    inline auto xindex_view<CT, I>::stepper_begin(const ST& shape) const -> const_stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, offset);
+    }
+
+    template <class CT, class I>
+    template <class ST>
+    inline auto xindex_view<CT, I>::stepper_end(const ST& shape, layout_type) const -> const_stepper
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, offset, true);
+    }
+
+    template <class CT, class I>
+    template <class E>
+    inline auto xindex_view<CT, I>::build_index_view(E&& e) const -> rebind_t<E>
+    {
+        return rebind_t<E>(std::forward<E>(e), indices_type(m_indices));
+    }
+
+    /******************************
+     * xfiltration implementation *
+     ******************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs a xfiltration on the given expression \c e, selecting
+     * the elements matching the specified \c condition.
+     *
+     * @param e the \ref xexpression to filter.
+     * @param condition the filtering \ref xexpression to apply.
+     */
+    template <class ECT, class CCT>
+    template <class ECTA, class CCTA>
+    inline xfiltration<ECT, CCT>::xfiltration(ECTA&& e, CCTA&& condition)
+        : m_e(std::forward<ECTA>(e))
+        , m_condition(std::forward<CCTA>(condition))
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * Assigns the scalar \c e to \c *this.
+     * @param e the scalar to assign.
+     * @return a reference to \ *this.
+     */
+    template <class ECT, class CCT>
+    template <class E>
+    inline auto xfiltration<ECT, CCT>::operator=(const E& e) -> disable_xexpression<E, self_type&>
+    {
+        return apply(
+            [this, &e](const_reference v, bool cond)
+            {
+                return cond ? e : v;
+            }
+        );
+    }
+
+    //@}
+
+    /**
+     * @name Computed assignement
+     */
+    //@{
+    /**
+     * Adds the scalar \c e to \c *this.
+     * @param e the scalar to add.
+     * @return a reference to \c *this.
+     */
+    template <class ECT, class CCT>
+    template <class E>
+    inline auto xfiltration<ECT, CCT>::operator+=(const E& e) -> disable_xexpression<E, self_type&>
+    {
+        return apply(
+            [&e](const_reference v, bool cond)
+            {
+                return cond ? v + e : v;
+            }
+        );
+    }
+
+    /**
+     * Subtracts the scalar \c e from \c *this.
+     * @param e the scalar to subtract.
+     * @return a reference to \c *this.
+     */
+    template <class ECT, class CCT>
+    template <class E>
+    inline auto xfiltration<ECT, CCT>::operator-=(const E& e) -> disable_xexpression<E, self_type&>
+    {
+        return apply(
+            [&e](const_reference v, bool cond)
+            {
+                return cond ? v - e : v;
+            }
+        );
+    }
+
+    /**
+     * Multiplies \c *this with the scalar \c e.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class ECT, class CCT>
+    template <class E>
+    inline auto xfiltration<ECT, CCT>::operator*=(const E& e) -> disable_xexpression<E, self_type&>
+    {
+        return apply(
+            [&e](const_reference v, bool cond)
+            {
+                return cond ? v * e : v;
+            }
+        );
+    }
+
+    /**
+     * Divides \c *this by the scalar \c e.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class ECT, class CCT>
+    template <class E>
+    inline auto xfiltration<ECT, CCT>::operator/=(const E& e) -> disable_xexpression<E, self_type&>
+    {
+        return apply(
+            [&e](const_reference v, bool cond)
+            {
+                return cond ? v / e : v;
+            }
+        );
+    }
+
+    /**
+     * Computes the remainder of \c *this after division by the scalar \c e.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class ECT, class CCT>
+    template <class E>
+    inline auto xfiltration<ECT, CCT>::operator%=(const E& e) -> disable_xexpression<E, self_type&>
+    {
+        return apply(
+            [&e](const_reference v, bool cond)
+            {
+                return cond ? v % e : v;
+            }
+        );
+    }
+
+    template <class ECT, class CCT>
+    template <class F>
+    inline auto xfiltration<ECT, CCT>::apply(F&& func) -> self_type&
+    {
+        std::transform(m_e.cbegin(), m_e.cend(), m_condition.cbegin(), m_e.begin(), func);
+        return *this;
+    }
+
+    /**
+     * @brief creates an indexview from a container of indices.
+     *
+     * Returns a 1D view with the elements at \a indices selected.
+     *
+     * @param e the underlying xexpression
+     * @param indices the indices to select
+     *
+     * @code{.cpp}
+     * xarray<double> a = {{1,5,3}, {4,5,6}};
+     * b = index_view(a, {{0, 0}, {1, 0}, {1, 1}});
+     * std::cout << b << std::endl; // {1, 4, 5}
+     * b += 100;
+     * std::cout << a << std::endl; // {{101, 5, 3}, {104, 105, 6}}
+     * @endcode
+     */
+    template <class E, class I>
+    inline auto index_view(E&& e, I&& indices) noexcept
+    {
+        using view_type = xindex_view<xclosure_t<E>, std::decay_t<I>>;
+        return view_type(std::forward<E>(e), std::forward<I>(indices));
+    }
+
+    template <class E, std::size_t L>
+    inline auto index_view(E&& e, const xindex (&indices)[L]) noexcept
+    {
+        using view_type = xindex_view<xclosure_t<E>, std::array<xindex, L>>;
+        return view_type(std::forward<E>(e), xt::to_array(indices));
+    }
+
+    /**
+     * @brief creates a view into \a e filtered by \a condition.
+     *
+     * Returns a 1D view with the elements selected where \a condition evaluates to \em true.
+     * This is equivalent to \verbatim{index_view(e, argwhere(condition));}\endverbatim
+     * The returned view is not optimal if you just want to assign a scalar to the filtered
+     * elements. In that case, you should consider using the \ref filtration function
+     * instead.
+     *
+     * @tparam L the traversal order
+     * @param e the underlying xexpression
+     * @param condition xexpression with shape of \a e which selects indices
+     *
+     * @code{.cpp}
+     * xarray<double> a = {{1,5,3}, {4,5,6}};
+     * b = filter(a, a >= 5);
+     * std::cout << b << std::endl; // {5, 5, 6}
+     * @endcode
+     *
+     * \sa filtration
+     */
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E, class O>
+    inline auto filter(E&& e, O&& condition) noexcept
+    {
+        auto indices = argwhere<L>(std::forward<O>(condition));
+        using view_type = xindex_view<xclosure_t<E>, decltype(indices)>;
+        return view_type(std::forward<E>(e), std::move(indices));
+    }
+
+    /**
+     * @brief creates a filtration of \c e filtered by \a condition.
+     *
+     * Returns a lazy filtration optimized for scalar assignment.
+     * Actually, scalar assignment and computed scalar assignments
+     * are the only available methods of the filtration, the filtration
+     * IS NOT an \ref xexpression.
+     *
+     * @param e the \ref xexpression to filter
+     * @param condition the filtering \ref xexpression
+     *
+     * @code{.cpp}
+     * xarray<double> a = {{1,5,3}, {4,5,6}};
+     * filtration(a, a >= 5) += 2;
+     * std::cout << a << std::endl; // {{1, 7, 3}, {4, 7, 8}}
+     * @endcode
+     */
+    template <class E, class C>
+    inline auto filtration(E&& e, C&& condition) noexcept
+    {
+        using filtration_type = xfiltration<xclosure_t<E>, xclosure_t<C>>;
+        return filtration_type(std::forward<E>(e), std::forward<C>(condition));
+    }
+}
+
+#endif

+ 142 - 0
3rd/numpy/include/xtensor/xinfo.hpp

@@ -0,0 +1,142 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_INFO_HPP
+#define XTENSOR_INFO_HPP
+
+#include <string>
+
+#include "xlayout.hpp"
+
+#ifndef _MSC_VER
+#if __cplusplus < 201103
+#define CONSTEXPR11_TN
+#define CONSTEXPR14_TN
+#define NOEXCEPT_TN
+#elif __cplusplus < 201402
+#define CONSTEXPR11_TN constexpr
+#define CONSTEXPR14_TN
+#define NOEXCEPT_TN noexcept
+#else
+#define CONSTEXPR11_TN constexpr
+#define CONSTEXPR14_TN constexpr
+#define NOEXCEPT_TN noexcept
+#endif
+#else  // _MSC_VER
+#if _MSC_VER < 1900
+#define CONSTEXPR11_TN
+#define CONSTEXPR14_TN
+#define NOEXCEPT_TN
+#elif _MSC_VER < 2000
+#define CONSTEXPR11_TN constexpr
+#define CONSTEXPR14_TN
+#define NOEXCEPT_TN noexcept
+#else
+#define CONSTEXPR11_TN constexpr
+#define CONSTEXPR14_TN constexpr
+#define NOEXCEPT_TN noexcept
+#endif
+#endif
+
+namespace xt
+{
+    // see http://stackoverflow.com/a/20170989
+    struct static_string
+    {
+        template <std::size_t N>
+        explicit CONSTEXPR11_TN static_string(const char (&a)[N]) NOEXCEPT_TN : data(a),
+                                                                                size(N - 1)
+        {
+        }
+
+        CONSTEXPR11_TN static_string(const char* a, const std::size_t sz) NOEXCEPT_TN : data(a),
+                                                                                        size(sz)
+        {
+        }
+
+        const char* const data;
+        const std::size_t size;
+    };
+
+    template <class T>
+    CONSTEXPR14_TN static_string type_name()
+    {
+#ifdef __clang__
+        static_string p(__PRETTY_FUNCTION__);
+        return static_string(p.data + 39, p.size - 39 - 1);
+#elif defined(__GNUC__)
+        static_string p(__PRETTY_FUNCTION__);
+#if __cplusplus < 201402
+        return static_string(p.data + 36, p.size - 36 - 1);
+#else
+        return static_string(p.data + 54, p.size - 54 - 1);
+#endif
+#elif defined(_MSC_VER)
+        static const static_string p(__FUNCSIG__);
+        return static_string(p.data + 47, p.size - 47 - 7);
+#endif
+    }
+
+    template <class T>
+    std::string type_to_string()
+    {
+        static_string static_name = type_name<T>();
+        return std::string(static_name.data, static_name.size);
+    }
+
+    template <class T>
+    std::string info(const T& t)
+    {
+        std::string s;
+        s += "\nValue type: " + type_to_string<typename T::value_type>();
+        s += "\nLayout: ";
+        if (t.layout() == layout_type::row_major)
+        {
+            s += "row_major";
+        }
+        else if (t.layout() == layout_type::column_major)
+        {
+            s += "column_major";
+        }
+        else if (t.layout() == layout_type::dynamic)
+        {
+            s += "dynamic";
+        }
+        else
+        {
+            s += "any";
+        }
+        s += "\nShape: (";
+        bool first = true;
+        for (const auto& el : t.shape())
+        {
+            if (!first)
+            {
+                s += ", ";
+            }
+            first = false;
+            s += std::to_string(el);
+        }
+        s += ")\nStrides: (";
+        first = true;
+        for (const auto& el : t.strides())
+        {
+            if (!first)
+            {
+                s += ", ";
+            }
+            first = false;
+            s += std::to_string(el);
+        }
+        s += ")\nSize: " + std::to_string(t.size()) + "\n";
+        return s;
+    }
+}
+
+#endif

+ 832 - 0
3rd/numpy/include/xtensor/xio.hpp

@@ -0,0 +1,832 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_IO_HPP
+#define XTENSOR_IO_HPP
+
+#include <complex>
+#include <cstddef>
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <sstream>
+#include <string>
+
+#include "xexpression.hpp"
+#include "xmath.hpp"
+#include "xstrided_view.hpp"
+
+namespace xt
+{
+
+    template <class E>
+    inline std::ostream& operator<<(std::ostream& out, const xexpression<E>& e);
+
+    /*****************
+     * print options *
+     *****************/
+
+    namespace print_options
+    {
+        struct print_options_impl
+        {
+            int edge_items = 3;
+            int line_width = 75;
+            int threshold = 1000;
+            int precision = -1;  // default precision
+        };
+
+        inline print_options_impl& print_options()
+        {
+            static print_options_impl po;
+            return po;
+        }
+
+        /**
+         * @brief Sets the line width. After \a line_width chars,
+         *        a new line is added.
+         *
+         * @param line_width The line width
+         */
+        inline void set_line_width(int line_width)
+        {
+            print_options().line_width = line_width;
+        }
+
+        /**
+         * @brief Sets the threshold after which summarization is triggered (default: 1000).
+         *
+         * @param threshold The number of elements in the xexpression that triggers
+         *                  summarization in the output
+         */
+        inline void set_threshold(int threshold)
+        {
+            print_options().threshold = threshold;
+        }
+
+        /**
+         * @brief Sets the number of edge items. If the summarization is
+         *        triggered, this value defines how many items of each dimension
+         *        are printed.
+         *
+         * @param edge_items The number of edge items
+         */
+        inline void set_edge_items(int edge_items)
+        {
+            print_options().edge_items = edge_items;
+        }
+
+        /**
+         * @brief Sets the precision for printing floating point values.
+         *
+         * @param precision The number of digits for floating point output
+         */
+        inline void set_precision(int precision)
+        {
+            print_options().precision = precision;
+        }
+
+#define DEFINE_LOCAL_PRINT_OPTION(NAME)                               \
+    class NAME                                                        \
+    {                                                                 \
+    public:                                                           \
+                                                                      \
+        NAME(int value)                                               \
+            : m_value(value)                                          \
+        {                                                             \
+            id();                                                     \
+        }                                                             \
+        static int id()                                               \
+        {                                                             \
+            static int id = std::ios_base::xalloc();                  \
+            return id;                                                \
+        }                                                             \
+        int value() const                                             \
+        {                                                             \
+            return m_value;                                           \
+        }                                                             \
+                                                                      \
+    private:                                                          \
+                                                                      \
+        int m_value;                                                  \
+    };                                                                \
+                                                                      \
+    inline std::ostream& operator<<(std::ostream& out, const NAME& n) \
+    {                                                                 \
+        out.iword(NAME::id()) = n.value();                            \
+        return out;                                                   \
+    }
+
+        /**
+         * @class line_width
+         *
+         * io manipulator used to set the width of the lines when printing
+         * an expression.
+         *
+         * @code{.cpp}
+         * using po = xt::print_options;
+         * xt::xarray<double> a = {{1, 2, 3}, {4, 5, 6}};
+         * std::cout << po::line_width(100) << a << std::endl;
+         * @endcode
+         */
+        DEFINE_LOCAL_PRINT_OPTION(line_width)
+
+        /**
+         * @class threshold
+         *
+         * io manipulator used to set the threshold after which summarization is
+         * triggered.
+         *
+         * @code{.cpp}
+         * using po = xt::print_options;
+         * xt::xarray<double> a = xt::rand::randn<double>({2000, 500});
+         * std::cout << po::threshold(50) << a << std::endl;
+         * @endcode
+         */
+        DEFINE_LOCAL_PRINT_OPTION(threshold)
+
+        /**
+         * @class edge_items
+         *
+         * io manipulator used to set the number of egde items if
+         * the summarization is triggered.
+         *
+         * @code{.cpp}
+         * using po = xt::print_options;
+         * xt::xarray<double> a = xt::rand::randn<double>({2000, 500});
+         * std::cout << po::edge_items(5) << a << std::endl;
+         * @endcode
+         */
+        DEFINE_LOCAL_PRINT_OPTION(edge_items)
+
+        /**
+         * @class precision
+         *
+         * io manipulator used to set the precision of the floating point values
+         * when printing an expression.
+         *
+         * @code{.cpp}
+         * using po = xt::print_options;
+         * xt::xarray<double> a = xt::rand::randn<double>({2000, 500});
+         * std::cout << po::precision(5) << a << std::endl;
+         * @endcode
+         */
+        DEFINE_LOCAL_PRINT_OPTION(precision)
+    }
+
+    /**************************************
+     * xexpression ostream implementation *
+     **************************************/
+
+    namespace detail
+    {
+        template <class E, class F>
+        std::ostream& xoutput(
+            std::ostream& out,
+            const E& e,
+            xstrided_slice_vector& slices,
+            F& printer,
+            std::size_t blanks,
+            std::streamsize element_width,
+            std::size_t edgeitems,
+            std::size_t line_width
+        )
+        {
+            using size_type = typename E::size_type;
+
+            const auto view = xt::strided_view(e, slices);
+            if (view.dimension() == 0)
+            {
+                printer.print_next(out);
+            }
+            else
+            {
+                std::string indents(blanks, ' ');
+
+                size_type i = 0;
+                size_type elems_on_line = 0;
+                const size_type ewp2 = static_cast<size_type>(element_width) + size_type(2);
+                const size_type line_lim = static_cast<size_type>(std::floor(line_width / ewp2));
+
+                out << '{';
+                for (; i != size_type(view.shape()[0] - 1); ++i)
+                {
+                    if (edgeitems && size_type(view.shape()[0]) > (edgeitems * 2) && i == edgeitems)
+                    {
+                        if (view.dimension() == 1 && line_lim != 0 && elems_on_line >= line_lim)
+                        {
+                            out << " ...,";
+                        }
+                        else if (view.dimension() > 1)
+                        {
+                            elems_on_line = 0;
+                            out << "...," << std::endl << indents;
+                        }
+                        else
+                        {
+                            out << "..., ";
+                        }
+                        i = size_type(view.shape()[0]) - edgeitems;
+                    }
+                    if (view.dimension() == 1 && line_lim != 0 && elems_on_line >= line_lim)
+                    {
+                        out << std::endl << indents;
+                        elems_on_line = 0;
+                    }
+                    slices.push_back(static_cast<int>(i));
+                    xoutput(out, e, slices, printer, blanks + 1, element_width, edgeitems, line_width) << ',';
+                    slices.pop_back();
+                    elems_on_line++;
+
+                    if ((view.dimension() == 1) && !(line_lim != 0 && elems_on_line >= line_lim))
+                    {
+                        out << ' ';
+                    }
+                    else if (view.dimension() > 1)
+                    {
+                        out << std::endl << indents;
+                    }
+                }
+                if (view.dimension() == 1 && line_lim != 0 && elems_on_line >= line_lim)
+                {
+                    out << std::endl << indents;
+                }
+                slices.push_back(static_cast<int>(i));
+                xoutput(out, e, slices, printer, blanks + 1, element_width, edgeitems, line_width) << '}';
+                slices.pop_back();
+            }
+            return out;
+        }
+
+        template <class F, class E>
+        void recurser_run(F& fn, const E& e, xstrided_slice_vector& slices, std::size_t lim = 0)
+        {
+            using size_type = typename E::size_type;
+            const auto view = strided_view(e, slices);
+            if (view.dimension() == 0)
+            {
+                fn.update(view());
+            }
+            else
+            {
+                size_type i = 0;
+                for (; i != static_cast<size_type>(view.shape()[0] - 1); ++i)
+                {
+                    if (lim && size_type(view.shape()[0]) > (lim * 2) && i == lim)
+                    {
+                        i = static_cast<size_type>(view.shape()[0]) - lim;
+                    }
+                    slices.push_back(static_cast<int>(i));
+                    recurser_run(fn, e, slices, lim);
+                    slices.pop_back();
+                }
+                slices.push_back(static_cast<int>(i));
+                recurser_run(fn, e, slices, lim);
+                slices.pop_back();
+            }
+        }
+
+        template <class T, class E = void>
+        struct printer;
+
+        template <class T>
+        struct printer<T, std::enable_if_t<std::is_floating_point<typename T::value_type>::value>>
+        {
+            using value_type = std::decay_t<typename T::value_type>;
+            using cache_type = std::vector<value_type>;
+            using cache_iterator = typename cache_type::const_iterator;
+
+            explicit printer(std::streamsize precision)
+                : m_precision(precision)
+            {
+            }
+
+            void init()
+            {
+                m_precision = m_required_precision < m_precision ? m_required_precision : m_precision;
+                m_it = m_cache.cbegin();
+                if (m_scientific)
+                {
+                    // 3 = sign, number and dot and 4 = "e+00"
+                    m_width = m_precision + 7;
+                    if (m_large_exponent)
+                    {
+                        // = e+000 (additional number)
+                        m_width += 1;
+                    }
+                }
+                else
+                {
+                    std::streamsize decimals = 1;  // print a leading 0
+                    if (std::floor(m_max) != 0)
+                    {
+                        decimals += std::streamsize(std::log10(std::floor(m_max)));
+                    }
+                    // 2 => sign and dot
+                    m_width = 2 + decimals + m_precision;
+                }
+                if (!m_required_precision)
+                {
+                    --m_width;
+                }
+            }
+
+            std::ostream& print_next(std::ostream& out)
+            {
+                if (!m_scientific)
+                {
+                    std::stringstream buf;
+                    buf.width(m_width);
+                    buf << std::fixed;
+                    buf.precision(m_precision);
+                    buf << (*m_it);
+                    if (!m_required_precision && !std::isinf(*m_it) && !std::isnan(*m_it))
+                    {
+                        buf << '.';
+                    }
+                    std::string res = buf.str();
+                    auto sit = res.rbegin();
+                    while (*sit == '0')
+                    {
+                        *sit = ' ';
+                        ++sit;
+                    }
+                    out << res;
+                }
+                else
+                {
+                    if (!m_large_exponent)
+                    {
+                        out << std::scientific;
+                        out.width(m_width);
+                        out << (*m_it);
+                    }
+                    else
+                    {
+                        std::stringstream buf;
+                        buf.width(m_width);
+                        buf << std::scientific;
+                        buf.precision(m_precision);
+                        buf << (*m_it);
+                        std::string res = buf.str();
+
+                        if (res[res.size() - 4] == 'e')
+                        {
+                            res.erase(0, 1);
+                            res.insert(res.size() - 2, "0");
+                        }
+                        out << res;
+                    }
+                }
+                ++m_it;
+                return out;
+            }
+
+            void update(const value_type& val)
+            {
+                if (val != 0 && !std::isinf(val) && !std::isnan(val))
+                {
+                    if (!m_scientific || !m_large_exponent)
+                    {
+                        int exponent = 1 + int(std::log10(math::abs(val)));
+                        if (exponent <= -5 || exponent > 7)
+                        {
+                            m_scientific = true;
+                            m_required_precision = m_precision;
+                            if (exponent <= -100 || exponent >= 100)
+                            {
+                                m_large_exponent = true;
+                            }
+                        }
+                    }
+                    if (math::abs(val) > m_max)
+                    {
+                        m_max = math::abs(val);
+                    }
+                    if (m_required_precision < m_precision)
+                    {
+                        while (std::floor(val * std::pow(10, m_required_precision))
+                               != val * std::pow(10, m_required_precision))
+                        {
+                            m_required_precision++;
+                        }
+                    }
+                }
+                m_cache.push_back(val);
+            }
+
+            std::streamsize width()
+            {
+                return m_width;
+            }
+
+        private:
+
+            bool m_large_exponent = false;
+            bool m_scientific = false;
+            std::streamsize m_width = 9;
+            std::streamsize m_precision;
+            std::streamsize m_required_precision = 0;
+            value_type m_max = 0;
+
+            cache_type m_cache;
+            cache_iterator m_it;
+        };
+
+        template <class T>
+        struct printer<
+            T,
+            std::enable_if_t<
+                xtl::is_integral<typename T::value_type>::value && !std::is_same<typename T::value_type, bool>::value>>
+        {
+            using value_type = std::decay_t<typename T::value_type>;
+            using cache_type = std::vector<value_type>;
+            using cache_iterator = typename cache_type::const_iterator;
+
+            explicit printer(std::streamsize)
+            {
+            }
+
+            void init()
+            {
+                m_it = m_cache.cbegin();
+                m_width = 1 + std::streamsize((m_max > 0) ? std::log10(m_max) : 0) + m_sign;
+            }
+
+            std::ostream& print_next(std::ostream& out)
+            {
+                // + enables printing of chars etc. as numbers
+                // TODO should chars be printed as numbers?
+                out.width(m_width);
+                out << +(*m_it);
+                ++m_it;
+                return out;
+            }
+
+            void update(const value_type& val)
+            {
+                if (math::abs(val) > m_max)
+                {
+                    m_max = math::abs(val);
+                }
+                if (xtl::is_signed<value_type>::value && val < 0)
+                {
+                    m_sign = true;
+                }
+                m_cache.push_back(val);
+            }
+
+            std::streamsize width()
+            {
+                return m_width;
+            }
+
+        private:
+
+            std::streamsize m_width;
+            bool m_sign = false;
+            value_type m_max = 0;
+
+            cache_type m_cache;
+            cache_iterator m_it;
+        };
+
+        template <class T>
+        struct printer<T, std::enable_if_t<std::is_same<typename T::value_type, bool>::value>>
+        {
+            using value_type = bool;
+            using cache_type = std::vector<bool>;
+            using cache_iterator = typename cache_type::const_iterator;
+
+            explicit printer(std::streamsize)
+            {
+            }
+
+            void init()
+            {
+                m_it = m_cache.cbegin();
+            }
+
+            std::ostream& print_next(std::ostream& out)
+            {
+                if (*m_it)
+                {
+                    out << " true";
+                }
+                else
+                {
+                    out << "false";
+                }
+                // TODO: the following std::setw(5) isn't working correctly on OSX.
+                // out << std::boolalpha << std::setw(m_width) << (*m_it);
+                ++m_it;
+                return out;
+            }
+
+            void update(const value_type& val)
+            {
+                m_cache.push_back(val);
+            }
+
+            std::streamsize width()
+            {
+                return m_width;
+            }
+
+        private:
+
+            std::streamsize m_width = 5;
+
+            cache_type m_cache;
+            cache_iterator m_it;
+        };
+
+        template <class T>
+        struct printer<T, std::enable_if_t<xtl::is_complex<typename T::value_type>::value>>
+        {
+            using value_type = std::decay_t<typename T::value_type>;
+            using cache_type = std::vector<bool>;
+            using cache_iterator = typename cache_type::const_iterator;
+
+            explicit printer(std::streamsize precision)
+                : real_printer(precision)
+                , imag_printer(precision)
+            {
+            }
+
+            void init()
+            {
+                real_printer.init();
+                imag_printer.init();
+                m_it = m_signs.cbegin();
+            }
+
+            std::ostream& print_next(std::ostream& out)
+            {
+                real_printer.print_next(out);
+                if (*m_it)
+                {
+                    out << "-";
+                }
+                else
+                {
+                    out << "+";
+                }
+                std::stringstream buf;
+                imag_printer.print_next(buf);
+                std::string s = buf.str();
+                if (s[0] == ' ')
+                {
+                    s.erase(0, 1);  // erase space for +/-
+                }
+                // insert j at end of number
+                std::size_t idx = s.find_last_not_of(" ");
+                s.insert(idx + 1, "i");
+                out << s;
+                ++m_it;
+                return out;
+            }
+
+            void update(const value_type& val)
+            {
+                real_printer.update(val.real());
+                imag_printer.update(std::abs(val.imag()));
+                m_signs.push_back(std::signbit(val.imag()));
+            }
+
+            std::streamsize width()
+            {
+                return real_printer.width() + imag_printer.width() + 2;
+            }
+
+        private:
+
+            printer<value_type> real_printer, imag_printer;
+            cache_type m_signs;
+            cache_iterator m_it;
+        };
+
+        template <class T>
+        struct printer<
+            T,
+            std::enable_if_t<
+                !xtl::is_fundamental<typename T::value_type>::value && !xtl::is_complex<typename T::value_type>::value>>
+        {
+            using const_reference = typename T::const_reference;
+            using value_type = std::decay_t<typename T::value_type>;
+            using cache_type = std::vector<std::string>;
+            using cache_iterator = typename cache_type::const_iterator;
+
+            explicit printer(std::streamsize)
+            {
+            }
+
+            void init()
+            {
+                m_it = m_cache.cbegin();
+                if (m_width > 20)
+                {
+                    m_width = 0;
+                }
+            }
+
+            std::ostream& print_next(std::ostream& out)
+            {
+                out.width(m_width);
+                out << *m_it;
+                ++m_it;
+                return out;
+            }
+
+            void update(const_reference val)
+            {
+                std::stringstream buf;
+                buf << val;
+                std::string s = buf.str();
+                if (int(s.size()) > m_width)
+                {
+                    m_width = std::streamsize(s.size());
+                }
+                m_cache.push_back(s);
+            }
+
+            std::streamsize width()
+            {
+                return m_width;
+            }
+
+        private:
+
+            std::streamsize m_width = 0;
+            cache_type m_cache;
+            cache_iterator m_it;
+        };
+
+        template <class E>
+        struct custom_formatter
+        {
+            using value_type = std::decay_t<typename E::value_type>;
+
+            template <class F>
+            custom_formatter(F&& func)
+                : m_func(func)
+            {
+            }
+
+            std::string operator()(const value_type& val) const
+            {
+                return m_func(val);
+            }
+
+        private:
+
+            std::function<std::string(const value_type&)> m_func;
+        };
+    }
+
+    inline print_options::print_options_impl get_print_options(std::ostream& out)
+    {
+        print_options::print_options_impl res;
+        using print_options::edge_items;
+        using print_options::line_width;
+        using print_options::precision;
+        using print_options::threshold;
+
+        res.edge_items = static_cast<int>(out.iword(edge_items::id()));
+        res.line_width = static_cast<int>(out.iword(line_width::id()));
+        res.threshold = static_cast<int>(out.iword(threshold::id()));
+        res.precision = static_cast<int>(out.iword(precision::id()));
+
+        if (!res.edge_items)
+        {
+            res.edge_items = print_options::print_options().edge_items;
+        }
+        else
+        {
+            out.iword(edge_items::id()) = long(0);
+        }
+        if (!res.line_width)
+        {
+            res.line_width = print_options::print_options().line_width;
+        }
+        else
+        {
+            out.iword(line_width::id()) = long(0);
+        }
+        if (!res.threshold)
+        {
+            res.threshold = print_options::print_options().threshold;
+        }
+        else
+        {
+            out.iword(threshold::id()) = long(0);
+        }
+        if (!res.precision)
+        {
+            res.precision = print_options::print_options().precision;
+        }
+        else
+        {
+            out.iword(precision::id()) = long(0);
+        }
+
+        return res;
+    }
+
+    template <class E, class F>
+    std::ostream& pretty_print(const xexpression<E>& e, F&& func, std::ostream& out = std::cout)
+    {
+        xfunction<detail::custom_formatter<E>, const_xclosure_t<E>> print_fun(
+            detail::custom_formatter<E>(std::forward<F>(func)),
+            e
+        );
+        return pretty_print(print_fun, out);
+    }
+
+    namespace detail
+    {
+        template <class S>
+        class fmtflags_guard
+        {
+        public:
+
+            explicit fmtflags_guard(S& stream)
+                : m_stream(stream)
+                , m_flags(stream.flags())
+            {
+            }
+
+            ~fmtflags_guard()
+            {
+                m_stream.flags(m_flags);
+            }
+
+        private:
+
+            S& m_stream;
+            std::ios_base::fmtflags m_flags;
+        };
+    }
+
+    template <class E>
+    std::ostream& pretty_print(const xexpression<E>& e, std::ostream& out = std::cout)
+    {
+        detail::fmtflags_guard<std::ostream> guard(out);
+
+        const E& d = e.derived_cast();
+
+        std::size_t lim = 0;
+        std::size_t sz = compute_size(d.shape());
+
+        auto po = get_print_options(out);
+
+        if (sz > static_cast<std::size_t>(po.threshold))
+        {
+            lim = static_cast<std::size_t>(po.edge_items);
+        }
+        if (sz == 0)
+        {
+            out << "{}";
+            return out;
+        }
+
+        auto temp_precision = out.precision();
+        auto precision = temp_precision;
+        if (po.precision != -1)
+        {
+            out.precision(static_cast<std::streamsize>(po.precision));
+            precision = static_cast<std::streamsize>(po.precision);
+        }
+
+        detail::printer<E> p(precision);
+
+        xstrided_slice_vector sv;
+        detail::recurser_run(p, d, sv, lim);
+        p.init();
+        sv.clear();
+        xoutput(out, d, sv, p, 1, p.width(), lim, static_cast<std::size_t>(po.line_width));
+
+        out.precision(temp_precision);  // restore precision
+
+        return out;
+    }
+
+    template <class E>
+    inline std::ostream& operator<<(std::ostream& out, const xexpression<E>& e)
+    {
+        return pretty_print(e, out);
+    }
+}
+#endif
+
+// Backward compatibility: include xmime.hpp in xio.hpp by default.
+
+#ifdef __CLING__
+#include "xmime.hpp"
+#endif

+ 1369 - 0
3rd/numpy/include/xtensor/xiterable.hpp

@@ -0,0 +1,1369 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_ITERABLE_HPP
+#define XTENSOR_ITERABLE_HPP
+
+#include "xiterator.hpp"
+
+namespace xt
+{
+
+    /*******************
+     * xconst_iterable *
+     *******************/
+
+    template <class D>
+    struct xiterable_inner_types;
+
+    /**
+     * @class xconst_iterable
+     * @brief Base class for multidimensional iterable constant expressions
+     *
+     * The xconst_iterable class defines the interface for multidimensional
+     * constant expressions that can be iterated.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xconst_iterable
+     *           provides the interface.
+     */
+    template <class D>
+    class xconst_iterable
+    {
+    public:
+
+        using derived_type = D;
+
+        using iterable_types = xiterable_inner_types<D>;
+        using inner_shape_type = typename iterable_types::inner_shape_type;
+
+        using stepper = typename iterable_types::stepper;
+        using const_stepper = typename iterable_types::const_stepper;
+
+        template <layout_type L>
+        using layout_iterator = xiterator<stepper, inner_shape_type*, L>;
+        template <layout_type L>
+        using const_layout_iterator = xiterator<const_stepper, inner_shape_type*, L>;
+        template <layout_type L>
+        using reverse_layout_iterator = std::reverse_iterator<layout_iterator<L>>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = std::reverse_iterator<const_layout_iterator<L>>;
+
+        using linear_iterator = layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using const_linear_iterator = const_layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using reverse_linear_iterator = reverse_layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using const_reverse_linear_iterator = const_reverse_layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+
+        template <class S, layout_type L>
+        using broadcast_iterator = xiterator<stepper, S, L>;
+        template <class S, layout_type L>
+        using const_broadcast_iterator = xiterator<const_stepper, S, L>;
+        template <class S, layout_type L>
+        using reverse_broadcast_iterator = std::reverse_iterator<broadcast_iterator<S, L>>;
+        template <class S, layout_type L>
+        using const_reverse_broadcast_iterator = std::reverse_iterator<const_broadcast_iterator<S, L>>;
+
+        using iterator = layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using const_iterator = const_layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using reverse_iterator = reverse_layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using const_reverse_iterator = const_reverse_layout_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_layout_iterator<L> begin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_layout_iterator<L> end() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_layout_iterator<L> cbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_layout_iterator<L> cend() const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_layout_iterator<L> rbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_layout_iterator<L> rend() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_layout_iterator<L> crbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_layout_iterator<L> crend() const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> begin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> end(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> cbegin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> cend(const S& shape) const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> rbegin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> rend(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> crbegin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> crend(const S& shape) const noexcept;
+
+    protected:
+
+        const inner_shape_type& get_shape() const;
+
+    private:
+
+        template <layout_type L>
+        const_layout_iterator<L> get_cbegin(bool end_index) const noexcept;
+        template <layout_type L>
+        const_layout_iterator<L> get_cend(bool end_index) const noexcept;
+
+        template <layout_type L, class S>
+        const_broadcast_iterator<S, L> get_cbegin(const S& shape, bool end_index) const noexcept;
+        template <layout_type L, class S>
+        const_broadcast_iterator<S, L> get_cend(const S& shape, bool end_index) const noexcept;
+
+        template <class S>
+        const_stepper get_stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper get_stepper_end(const S& shape, layout_type l) const noexcept;
+
+        const derived_type& derived_cast() const;
+    };
+
+    /*************
+     * xiterable *
+     *************/
+
+    /**
+     * @class xiterable
+     * @brief Base class for multidimensional iterable expressions
+     *
+     * The xiterable class defines the interface for multidimensional
+     * expressions that can be iterated.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xiterable
+     *           provides the interface.
+     */
+    template <class D>
+    class xiterable : public xconst_iterable<D>
+    {
+    public:
+
+        using derived_type = D;
+
+        using base_type = xconst_iterable<D>;
+        using inner_shape_type = typename base_type::inner_shape_type;
+
+        using stepper = typename base_type::stepper;
+        using const_stepper = typename base_type::const_stepper;
+
+        using linear_iterator = typename base_type::linear_iterator;
+        using reverse_linear_iterator = typename base_type::reverse_linear_iterator;
+
+        template <layout_type L>
+        using layout_iterator = typename base_type::template layout_iterator<L>;
+        template <layout_type L>
+        using const_layout_iterator = typename base_type::template const_layout_iterator<L>;
+        template <layout_type L>
+        using reverse_layout_iterator = typename base_type::template reverse_layout_iterator<L>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = typename base_type::template const_reverse_layout_iterator<L>;
+
+        template <class S, layout_type L>
+        using broadcast_iterator = typename base_type::template broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_broadcast_iterator = typename base_type::template const_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using reverse_broadcast_iterator = typename base_type::template reverse_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_reverse_broadcast_iterator = typename base_type::template const_reverse_broadcast_iterator<S, L>;
+
+        using iterator = typename base_type::iterator;
+        using const_iterator = typename base_type::const_iterator;
+        using reverse_iterator = typename base_type::reverse_iterator;
+        using const_reverse_iterator = typename base_type::const_reverse_iterator;
+
+        using base_type::begin;
+        using base_type::end;
+        using base_type::rbegin;
+        using base_type::rend;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        layout_iterator<L> begin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        layout_iterator<L> end() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_layout_iterator<L> rbegin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_layout_iterator<L> rend() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        broadcast_iterator<S, L> begin(const S& shape) noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        broadcast_iterator<S, L> end(const S& shape) noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        reverse_broadcast_iterator<S, L> rbegin(const S& shape) noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        reverse_broadcast_iterator<S, L> rend(const S& shape) noexcept;
+
+    private:
+
+        template <layout_type L>
+        layout_iterator<L> get_begin(bool end_index) noexcept;
+        template <layout_type L>
+        layout_iterator<L> get_end(bool end_index) noexcept;
+
+        template <layout_type L, class S>
+        broadcast_iterator<S, L> get_begin(const S& shape, bool end_index) noexcept;
+        template <layout_type L, class S>
+        broadcast_iterator<S, L> get_end(const S& shape, bool end_index) noexcept;
+
+        template <class S>
+        stepper get_stepper_begin(const S& shape) noexcept;
+        template <class S>
+        stepper get_stepper_end(const S& shape, layout_type l) noexcept;
+
+        template <class S>
+        const_stepper get_stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper get_stepper_end(const S& shape, layout_type l) const noexcept;
+
+        derived_type& derived_cast();
+    };
+
+    /************************
+     * xcontiguous_iterable *
+     ************************/
+
+    template <class D>
+    struct xcontainer_inner_types;
+
+    namespace detail
+    {
+        template <class T, bool is_const>
+        struct get_storage_iterator;
+
+        template <class T>
+        struct get_storage_iterator<T, true>
+        {
+            using iterator = typename T::const_iterator;
+            using const_iterator = typename T::const_iterator;
+            using reverse_iterator = typename T::const_reverse_iterator;
+            using const_reverse_iterator = typename T::const_reverse_iterator;
+        };
+
+        template <class T>
+        struct get_storage_iterator<T, false>
+        {
+            using iterator = typename T::iterator;
+            using const_iterator = typename T::const_iterator;
+            using reverse_iterator = typename T::reverse_iterator;
+            using const_reverse_iterator = typename T::const_reverse_iterator;
+        };
+
+        template <class D, bool has_storage_type>
+        struct linear_iterator_traits_impl;
+
+        template <class D>
+        struct linear_iterator_traits_impl<D, true>
+        {
+            using inner_types = xcontainer_inner_types<D>;
+            using storage_type = typename inner_types::storage_type;
+            using iterator_type = get_storage_iterator<storage_type, std::is_const<storage_type>::value>;
+            using linear_iterator = typename iterator_type::iterator;
+            using const_linear_iterator = typename iterator_type::const_iterator;
+            using reverse_linear_iterator = typename iterator_type::reverse_iterator;
+            using const_reverse_linear_iterator = typename iterator_type::const_reverse_iterator;
+        };
+
+        template <class D>
+        struct linear_iterator_traits_impl<D, false>
+        {
+            using inner_types = xcontainer_inner_types<D>;
+            using xexpression_type = typename inner_types::xexpression_type;
+            using linear_iterator = typename xexpression_type::linear_iterator;
+            using const_linear_iterator = typename xexpression_type::const_linear_iterator;
+            using reverse_linear_iterator = typename xexpression_type::reverse_linear_iterator;
+            using const_reverse_linear_iterator = typename xexpression_type::const_reverse_linear_iterator;
+        };
+
+        template <class D>
+        using linear_iterator_traits = linear_iterator_traits_impl<D, has_storage_type<D>::value>;
+    }
+
+    /**
+     * @class xcontiguous_iterable
+     * @brief Base class for multidimensional iterable expressions with
+     * contiguous storage
+     *
+     * The xcontiguous_iterable class defines the interface for multidimensional
+     * expressions with contiguous that can be iterated.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xcontiguous_iterable
+     *           provides the interface.
+     */
+    template <class D>
+    class xcontiguous_iterable : private xiterable<D>
+    {
+    public:
+
+        using derived_type = D;
+
+        using inner_types = xcontainer_inner_types<D>;
+
+        using iterable_base = xiterable<D>;
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        static constexpr layout_type static_layout = inner_types::layout;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1910
+        // Workaround for compiler bug in Visual Studio 2017 with respect to alias templates with non-type
+        // parameters.
+        template <layout_type L>
+        using layout_iterator = xiterator<typename iterable_base::stepper, typename iterable_base::inner_shape_type*, L>;
+        template <layout_type L>
+        using const_layout_iterator = xiterator<
+            typename iterable_base::const_stepper,
+            typename iterable_base::inner_shape_type*,
+            L>;
+        template <layout_type L>
+        using reverse_layout_iterator = std::reverse_iterator<layout_iterator<L>>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = std::reverse_iterator<const_layout_iterator<L>>;
+#else
+        template <layout_type L>
+        using layout_iterator = typename iterable_base::template layout_iterator<L>;
+        template <layout_type L>
+        using const_layout_iterator = typename iterable_base::template const_layout_iterator<L>;
+        template <layout_type L>
+        using reverse_layout_iterator = typename iterable_base::template reverse_layout_iterator<L>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = typename iterable_base::template const_reverse_layout_iterator<L>;
+#endif
+
+        template <class S, layout_type L>
+        using broadcast_iterator = typename iterable_base::template broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_broadcast_iterator = typename iterable_base::template const_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using reverse_broadcast_iterator = typename iterable_base::template reverse_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_reverse_broadcast_iterator = typename iterable_base::template const_reverse_broadcast_iterator<S, L>;
+
+        using linear_traits = detail::linear_iterator_traits<D>;
+        using linear_iterator = typename linear_traits::linear_iterator;
+        using const_linear_iterator = typename linear_traits::const_linear_iterator;
+        using reverse_linear_iterator = typename linear_traits::reverse_linear_iterator;
+        using const_reverse_linear_iterator = typename linear_traits::const_reverse_linear_iterator;
+
+        template <layout_type L, class It1, class It2>
+        using select_iterator_impl = std::conditional_t<L == static_layout, It1, It2>;
+
+        template <layout_type L>
+        using select_iterator = select_iterator_impl<L, linear_iterator, layout_iterator<L>>;
+        template <layout_type L>
+        using select_const_iterator = select_iterator_impl<L, const_linear_iterator, const_layout_iterator<L>>;
+        template <layout_type L>
+        using select_reverse_iterator = select_iterator_impl<L, reverse_linear_iterator, reverse_layout_iterator<L>>;
+        template <layout_type L>
+        using select_const_reverse_iterator = select_iterator_impl<
+            L,
+            const_reverse_linear_iterator,
+            const_reverse_layout_iterator<L>>;
+
+        using iterator = select_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using const_iterator = select_const_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using reverse_iterator = select_reverse_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+        using const_reverse_iterator = select_const_reverse_iterator<XTENSOR_DEFAULT_TRAVERSAL>;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_iterator<L> begin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_iterator<L> end() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_iterator<L> begin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_iterator<L> end() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_iterator<L> cbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_iterator<L> cend() const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_reverse_iterator<L> rbegin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_reverse_iterator<L> rend() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_reverse_iterator<L> rbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_reverse_iterator<L> rend() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_reverse_iterator<L> crbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        select_const_reverse_iterator<L> crend() const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        broadcast_iterator<S, L> begin(const S& shape) noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        broadcast_iterator<S, L> end(const S& shape) noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> begin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> end(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> cbegin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_broadcast_iterator<S, L> cend(const S& shape) const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        reverse_broadcast_iterator<S, L> rbegin(const S& shape) noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        reverse_broadcast_iterator<S, L> rend(const S& shape) noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> rbegin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> rend(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> crbegin(const S& shape) const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S>
+        const_reverse_broadcast_iterator<S, L> crend(const S& shape) const noexcept;
+
+    private:
+
+        derived_type& derived_cast();
+        const derived_type& derived_cast() const;
+
+        friend class xiterable<D>;
+        friend class xconst_iterable<D>;
+    };
+
+    /**********************************
+     * xconst_iterable implementation *
+     **********************************/
+
+    /**
+     * @name Constant iterators
+     */
+    //@{
+    /**
+     * Returns a constant iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::begin() const noexcept -> const_layout_iterator<L>
+    {
+        return this->template cbegin<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::end() const noexcept -> const_layout_iterator<L>
+    {
+        return this->template cend<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::cbegin() const noexcept -> const_layout_iterator<L>
+    {
+        return this->template get_cbegin<L>(false);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::cend() const noexcept -> const_layout_iterator<L>
+    {
+        return this->template get_cend<L>(true);
+    }
+
+    //@}
+
+    /**
+     * @name Constant reverse iterators
+     */
+    //@{
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::rbegin() const noexcept -> const_reverse_layout_iterator<L>
+    {
+        return this->template crbegin<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::rend() const noexcept -> const_reverse_layout_iterator<L>
+    {
+        return this->template crend<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::crbegin() const noexcept -> const_reverse_layout_iterator<L>
+    {
+        return const_reverse_layout_iterator<L>(get_cend<L>(true));
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::crend() const noexcept -> const_reverse_layout_iterator<L>
+    {
+        return const_reverse_layout_iterator<L>(get_cbegin<L>(false));
+    }
+
+    //@}
+
+    /**
+     * @name Constant broadcast iterators
+     */
+    //@{
+    /**
+     * Returns a constant iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::begin(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return cbegin<L>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::end(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return cend<L>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::cbegin(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return get_cbegin<L, S>(shape, false);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::cend(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return get_cend<L, S>(shape, true);
+    }
+
+    //@}
+
+    /**
+     * @name Constant reverse broadcast iterators
+     */
+    //@{
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::rbegin(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return crbegin<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * reversed expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::rend(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return crend<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::crbegin(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return const_reverse_broadcast_iterator<S, L>(get_cend<L, S>(shape, true));
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * reversed expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::crend(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return const_reverse_broadcast_iterator<S, L>(get_cbegin<L, S>(shape, false));
+    }
+
+    //@}
+
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::get_cbegin(bool end_index) const noexcept -> const_layout_iterator<L>
+    {
+        return const_layout_iterator<L>(get_stepper_begin(get_shape()), &get_shape(), end_index);
+    }
+
+    template <class D>
+    template <layout_type L>
+    inline auto xconst_iterable<D>::get_cend(bool end_index) const noexcept -> const_layout_iterator<L>
+    {
+        return const_layout_iterator<L>(get_stepper_end(get_shape(), L), &get_shape(), end_index);
+    }
+
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::get_cbegin(const S& shape, bool end_index) const noexcept
+        -> const_broadcast_iterator<S, L>
+    {
+        return const_broadcast_iterator<S, L>(get_stepper_begin(shape), shape, end_index);
+    }
+
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xconst_iterable<D>::get_cend(const S& shape, bool end_index) const noexcept
+        -> const_broadcast_iterator<S, L>
+    {
+        return const_broadcast_iterator<S, L>(get_stepper_end(shape, L), shape, end_index);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xconst_iterable<D>::get_stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        return derived_cast().stepper_begin(shape);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xconst_iterable<D>::get_stepper_end(const S& shape, layout_type l) const noexcept
+        -> const_stepper
+    {
+        return derived_cast().stepper_end(shape, l);
+    }
+
+    template <class D>
+    inline auto xconst_iterable<D>::get_shape() const -> const inner_shape_type&
+    {
+        return derived_cast().shape();
+    }
+
+    template <class D>
+    inline auto xconst_iterable<D>::derived_cast() const -> const derived_type&
+    {
+        return *static_cast<const derived_type*>(this);
+    }
+
+    /****************************
+     * xiterable implementation *
+     ****************************/
+
+    /**
+     * @name Iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xiterable<D>::begin() noexcept -> layout_iterator<L>
+    {
+        return get_begin<L>(false);
+    }
+
+    /**
+     * Returns an iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xiterable<D>::end() noexcept -> layout_iterator<L>
+    {
+        return get_end<L>(true);
+    }
+
+    //@}
+
+    /**
+     * @name Reverse iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xiterable<D>::rbegin() noexcept -> reverse_layout_iterator<L>
+    {
+        return reverse_layout_iterator<L>(get_end<L>(true));
+    }
+
+    /**
+     * Returns an iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xiterable<D>::rend() noexcept -> reverse_layout_iterator<L>
+    {
+        return reverse_layout_iterator<L>(get_begin<L>(false));
+    }
+
+    //@}
+
+    /**
+     * @name Broadcast iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xiterable<D>::begin(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return get_begin<L, S>(shape, false);
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xiterable<D>::end(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return get_end<L, S>(shape, true);
+    }
+
+    //@}
+
+    /**
+     * @name Reverse broadcast iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the reversed expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xiterable<D>::rbegin(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return reverse_broadcast_iterator<S, L>(get_end<L, S>(shape, true));
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of the
+     * reversed expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xiterable<D>::rend(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return reverse_broadcast_iterator<S, L>(get_begin<L, S>(shape, false));
+    }
+
+    //@}
+
+    template <class D>
+    template <layout_type L>
+    inline auto xiterable<D>::get_begin(bool end_index) noexcept -> layout_iterator<L>
+    {
+        return layout_iterator<L>(get_stepper_begin(this->get_shape()), &(this->get_shape()), end_index);
+    }
+
+    template <class D>
+    template <layout_type L>
+    inline auto xiterable<D>::get_end(bool end_index) noexcept -> layout_iterator<L>
+    {
+        return layout_iterator<L>(get_stepper_end(this->get_shape(), L), &(this->get_shape()), end_index);
+    }
+
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xiterable<D>::get_begin(const S& shape, bool end_index) noexcept -> broadcast_iterator<S, L>
+    {
+        return broadcast_iterator<S, L>(get_stepper_begin(shape), shape, end_index);
+    }
+
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xiterable<D>::get_end(const S& shape, bool end_index) noexcept -> broadcast_iterator<S, L>
+    {
+        return broadcast_iterator<S, L>(get_stepper_end(shape, L), shape, end_index);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xiterable<D>::get_stepper_begin(const S& shape) noexcept -> stepper
+    {
+        return derived_cast().stepper_begin(shape);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xiterable<D>::get_stepper_end(const S& shape, layout_type l) noexcept -> stepper
+    {
+        return derived_cast().stepper_end(shape, l);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xiterable<D>::get_stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        return derived_cast().stepper_begin(shape);
+    }
+
+    template <class D>
+    template <class S>
+    inline auto xiterable<D>::get_stepper_end(const S& shape, layout_type l) const noexcept -> const_stepper
+    {
+        return derived_cast().stepper_end(shape, l);
+    }
+
+    template <class D>
+    inline auto xiterable<D>::derived_cast() -> derived_type&
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    /***************************************
+     * xcontiguous_iterable implementation *
+     ***************************************/
+
+    /**
+     * @name Iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::begin() noexcept -> select_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_begin();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template begin<L>();
+            }
+        );
+    }
+
+    /**
+     * Returns an iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::end() noexcept -> select_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_end();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template end<L>();
+            }
+        );
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::begin() const noexcept -> select_const_iterator<L>
+    {
+        return this->template cbegin<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::end() const noexcept -> select_const_iterator<L>
+    {
+        return this->template cend<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::cbegin() const noexcept -> select_const_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_cbegin();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template cbegin<L>();
+            }
+        );
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::cend() const noexcept -> select_const_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_cend();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template cend<L>();
+            }
+        );
+    }
+
+    //@}
+
+    /**
+     * @name Reverse iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::rbegin() noexcept -> select_reverse_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_rbegin();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template rbegin<L>();
+            }
+        );
+    }
+
+    /**
+     * Returns an iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::rend() noexcept -> select_reverse_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_rend();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template rend<L>();
+            }
+        );
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::rbegin() const noexcept -> select_const_reverse_iterator<L>
+    {
+        return this->template crbegin<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::rend() const noexcept -> select_const_reverse_iterator<L>
+    {
+        return this->template crend<L>();
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::crbegin() const noexcept -> select_const_reverse_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_crbegin();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template crbegin<L>();
+            }
+        );
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element
+     * of the reversed expression.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L>
+    inline auto xcontiguous_iterable<D>::crend() const noexcept -> select_const_reverse_iterator<L>
+    {
+        return xtl::mpl::static_if<L == static_layout>(
+            [&](auto self)
+            {
+                return self(*this).derived_cast().linear_crend();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(*this).iterable_base::template crend<L>();
+            }
+        );
+    }
+
+    //@}
+
+    /**
+     * @name Broadcast iterators
+     */
+
+    /**
+     * Returns an iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    //@{
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::begin(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return iterable_base::template begin<L, S>(shape);
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::end(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return iterable_base::template end<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::begin(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template begin<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::end(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template end<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::cbegin(const S& shape) const noexcept
+        -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template cbegin<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::cend(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template cend<L, S>(shape);
+    }
+
+    //@}
+
+    /**
+     * @name Reverse broadcast iterators
+     */
+    //@{
+    /**
+     * Returns an iterator to the first element of the reversed expression. The
+     * iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::rbegin(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rbegin<L, S>(shape);
+    }
+
+    /**
+     * Returns an iterator to the element following the last element of the
+     * reversed expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::rend(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rend<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::rbegin(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rbegin<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * reversed expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::rend(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rend<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the first element of the reversed expression.
+     * The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::crbegin(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template crbegin<L, S>(shape);
+    }
+
+    /**
+     * Returns a constant iterator to the element following the last element of the
+     * reversed expression. The iteration is broadcasted to the specified shape.
+     * @param shape the shape used for broadcasting
+     * @tparam S type of the \c shape parameter.
+     * @tparam L order used for the traversal. Default value is \c XTENSOR_DEFAULT_TRAVERSAL.
+     */
+    template <class D>
+    template <layout_type L, class S>
+    inline auto xcontiguous_iterable<D>::crend(const S& shape) const noexcept
+        -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template crend<L, S>(shape);
+    }
+
+    //@}
+
+    template <class D>
+    inline auto xcontiguous_iterable<D>::derived_cast() -> derived_type&
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    template <class D>
+    inline auto xcontiguous_iterable<D>::derived_cast() const -> const derived_type&
+    {
+        return *static_cast<const derived_type*>(this);
+    }
+
+}
+
+#endif

+ 1303 - 0
3rd/numpy/include/xtensor/xiterator.hpp

@@ -0,0 +1,1303 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_ITERATOR_HPP
+#define XTENSOR_ITERATOR_HPP
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <iterator>
+#include <numeric>
+#include <vector>
+
+#include <xtl/xcompare.hpp>
+#include <xtl/xiterator_base.hpp>
+#include <xtl/xmeta_utils.hpp>
+#include <xtl/xsequence.hpp>
+
+#include "xexception.hpp"
+#include "xlayout.hpp"
+#include "xshape.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    /***********************
+     * iterator meta utils *
+     ***********************/
+
+    template <class CT>
+    class xscalar;
+
+    template <bool is_const, class CT>
+    class xscalar_stepper;
+
+    namespace detail
+    {
+        template <class C>
+        struct get_stepper_iterator_impl
+        {
+            using type = typename C::container_iterator;
+        };
+
+        template <class C>
+        struct get_stepper_iterator_impl<const C>
+        {
+            using type = typename C::const_container_iterator;
+        };
+
+        template <class CT>
+        struct get_stepper_iterator_impl<xscalar<CT>>
+        {
+            using type = typename xscalar<CT>::dummy_iterator;
+        };
+
+        template <class CT>
+        struct get_stepper_iterator_impl<const xscalar<CT>>
+        {
+            using type = typename xscalar<CT>::const_dummy_iterator;
+        };
+    }
+
+    template <class C>
+    using get_stepper_iterator = typename detail::get_stepper_iterator_impl<C>::type;
+
+    /********************************
+     * xindex_type_t implementation *
+     ********************************/
+
+    namespace detail
+    {
+        template <class ST>
+        struct index_type_impl
+        {
+            using type = dynamic_shape<typename ST::value_type>;
+        };
+
+        template <class V, std::size_t L>
+        struct index_type_impl<std::array<V, L>>
+        {
+            using type = std::array<V, L>;
+        };
+
+        template <std::size_t... I>
+        struct index_type_impl<fixed_shape<I...>>
+        {
+            using type = std::array<std::size_t, sizeof...(I)>;
+        };
+    }
+
+    template <class C>
+    using xindex_type_t = typename detail::index_type_impl<C>::type;
+
+    /************
+     * xstepper *
+     ************/
+
+    template <class C>
+    class xstepper
+    {
+    public:
+
+        using storage_type = C;
+        using subiterator_type = get_stepper_iterator<C>;
+        using subiterator_traits = std::iterator_traits<subiterator_type>;
+        using value_type = typename subiterator_traits::value_type;
+        using reference = typename subiterator_traits::reference;
+        using pointer = typename subiterator_traits::pointer;
+        using difference_type = typename subiterator_traits::difference_type;
+        using size_type = typename storage_type::size_type;
+        using shape_type = typename storage_type::shape_type;
+        using simd_value_type = xt_simd::simd_type<value_type>;
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        xstepper() = default;
+        xstepper(storage_type* c, subiterator_type it, size_type offset) noexcept;
+
+        reference operator*() const;
+
+        void step(size_type dim, size_type n = 1);
+        void step_back(size_type dim, size_type n = 1);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type l);
+
+        template <class T>
+        simd_return_type<T> step_simd();
+
+        void step_leading();
+
+        template <class R>
+        void store_simd(const R& vec);
+
+    private:
+
+        storage_type* p_c;
+        subiterator_type m_it;
+        size_type m_offset;
+    };
+
+    template <layout_type L>
+    struct stepper_tools
+    {
+        // For performance reasons, increment_stepper and decrement_stepper are
+        // specialized for the case where n=1, which underlies operator++ and
+        // operator-- on xiterators.
+
+        template <class S, class IT, class ST>
+        static void increment_stepper(S& stepper, IT& index, const ST& shape);
+
+        template <class S, class IT, class ST>
+        static void decrement_stepper(S& stepper, IT& index, const ST& shape);
+
+        template <class S, class IT, class ST>
+        static void increment_stepper(S& stepper, IT& index, const ST& shape, typename S::size_type n);
+
+        template <class S, class IT, class ST>
+        static void decrement_stepper(S& stepper, IT& index, const ST& shape, typename S::size_type n);
+    };
+
+    /********************
+     * xindexed_stepper *
+     ********************/
+
+    template <class E, bool is_const>
+    class xindexed_stepper
+    {
+    public:
+
+        using self_type = xindexed_stepper<E, is_const>;
+        using xexpression_type = std::conditional_t<is_const, const E, E>;
+
+        using value_type = typename xexpression_type::value_type;
+        using reference = std::
+            conditional_t<is_const, typename xexpression_type::const_reference, typename xexpression_type::reference>;
+        using pointer = std::
+            conditional_t<is_const, typename xexpression_type::const_pointer, typename xexpression_type::pointer>;
+        using size_type = typename xexpression_type::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+
+        using shape_type = typename xexpression_type::shape_type;
+        using index_type = xindex_type_t<shape_type>;
+
+        xindexed_stepper() = default;
+        xindexed_stepper(xexpression_type* e, size_type offset, bool end = false) noexcept;
+
+        reference operator*() const;
+
+        void step(size_type dim, size_type n = 1);
+        void step_back(size_type dim, size_type n = 1);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type l);
+
+    private:
+
+        xexpression_type* p_e;
+        index_type m_index;
+        size_type m_offset;
+    };
+
+    template <class T>
+    struct is_indexed_stepper
+    {
+        static const bool value = false;
+    };
+
+    template <class T, bool B>
+    struct is_indexed_stepper<xindexed_stepper<T, B>>
+    {
+        static const bool value = true;
+    };
+
+    template <class T, class R = T>
+    struct enable_indexed_stepper : std::enable_if<is_indexed_stepper<T>::value, R>
+    {
+    };
+
+    template <class T, class R = T>
+    using enable_indexed_stepper_t = typename enable_indexed_stepper<T, R>::type;
+
+    template <class T, class R = T>
+    struct disable_indexed_stepper : std::enable_if<!is_indexed_stepper<T>::value, R>
+    {
+    };
+
+    template <class T, class R = T>
+    using disable_indexed_stepper_t = typename disable_indexed_stepper<T, R>::type;
+
+    /*************
+     * xiterator *
+     *************/
+
+    namespace detail
+    {
+        template <class S>
+        class shape_storage
+        {
+        public:
+
+            using shape_type = S;
+            using param_type = const S&;
+
+            shape_storage() = default;
+            shape_storage(param_type shape);
+            const S& shape() const;
+
+        private:
+
+            S m_shape;
+        };
+
+        template <class S>
+        class shape_storage<S*>
+        {
+        public:
+
+            using shape_type = S;
+            using param_type = const S*;
+
+            shape_storage(param_type shape = 0);
+            const S& shape() const;
+
+        private:
+
+            const S* p_shape;
+        };
+
+        template <layout_type L>
+        struct LAYOUT_FORBIDEN_FOR_XITERATOR;
+    }
+
+    template <class St, class S, layout_type L>
+    class xiterator : public xtl::xrandom_access_iterator_base<
+                          xiterator<St, S, L>,
+                          typename St::value_type,
+                          typename St::difference_type,
+                          typename St::pointer,
+                          typename St::reference>,
+                      private detail::shape_storage<S>
+    {
+    public:
+
+        using self_type = xiterator<St, S, L>;
+
+        using stepper_type = St;
+        using value_type = typename stepper_type::value_type;
+        using reference = typename stepper_type::reference;
+        using pointer = typename stepper_type::pointer;
+        using difference_type = typename stepper_type::difference_type;
+        using size_type = typename stepper_type::size_type;
+        using iterator_category = std::random_access_iterator_tag;
+
+        using private_base = detail::shape_storage<S>;
+        using shape_type = typename private_base::shape_type;
+        using shape_param_type = typename private_base::param_type;
+        using index_type = xindex_type_t<shape_type>;
+
+        xiterator() = default;
+
+        // end_index means either reverse_iterator && !end or !reverse_iterator && end
+        xiterator(St st, shape_param_type shape, bool end_index);
+
+        self_type& operator++();
+        self_type& operator--();
+
+        self_type& operator+=(difference_type n);
+        self_type& operator-=(difference_type n);
+
+        difference_type operator-(const self_type& rhs) const;
+
+        reference operator*() const;
+        pointer operator->() const;
+
+        bool equal(const xiterator& rhs) const;
+        bool less_than(const xiterator& rhs) const;
+
+    private:
+
+        stepper_type m_st;
+        index_type m_index;
+        difference_type m_linear_index;
+
+        using checking_type = typename detail::LAYOUT_FORBIDEN_FOR_XITERATOR<L>::type;
+    };
+
+    template <class St, class S, layout_type L>
+    bool operator==(const xiterator<St, S, L>& lhs, const xiterator<St, S, L>& rhs);
+
+    template <class St, class S, layout_type L>
+    bool operator<(const xiterator<St, S, L>& lhs, const xiterator<St, S, L>& rhs);
+
+    template <class St, class S, layout_type L>
+    struct is_contiguous_container<xiterator<St, S, L>> : std::false_type
+    {
+    };
+
+    /*********************
+     * xbounded_iterator *
+     *********************/
+
+    template <class It, class BIt>
+    class xbounded_iterator : public xtl::xrandom_access_iterator_base<
+                                  xbounded_iterator<It, BIt>,
+                                  typename std::iterator_traits<It>::value_type,
+                                  typename std::iterator_traits<It>::difference_type,
+                                  typename std::iterator_traits<It>::pointer,
+                                  typename std::iterator_traits<It>::reference>
+    {
+    public:
+
+        using self_type = xbounded_iterator<It, BIt>;
+
+        using subiterator_type = It;
+        using bound_iterator_type = BIt;
+        using value_type = typename std::iterator_traits<It>::value_type;
+        using reference = typename std::iterator_traits<It>::reference;
+        using pointer = typename std::iterator_traits<It>::pointer;
+        using difference_type = typename std::iterator_traits<It>::difference_type;
+        using iterator_category = std::random_access_iterator_tag;
+
+        xbounded_iterator() = default;
+        xbounded_iterator(It it, BIt bound_it);
+
+        self_type& operator++();
+        self_type& operator--();
+
+        self_type& operator+=(difference_type n);
+        self_type& operator-=(difference_type n);
+
+        difference_type operator-(const self_type& rhs) const;
+
+        value_type operator*() const;
+
+        bool equal(const self_type& rhs) const;
+        bool less_than(const self_type& rhs) const;
+
+    private:
+
+        subiterator_type m_it;
+        bound_iterator_type m_bound_it;
+    };
+
+    template <class It, class BIt>
+    bool operator==(const xbounded_iterator<It, BIt>& lhs, const xbounded_iterator<It, BIt>& rhs);
+
+    template <class It, class BIt>
+    bool operator<(const xbounded_iterator<It, BIt>& lhs, const xbounded_iterator<It, BIt>& rhs);
+
+    /*****************************
+     * linear_begin / linear_end *
+     *****************************/
+
+    namespace detail
+    {
+        template <class C, class = void_t<>>
+        struct has_linear_iterator : std::false_type
+        {
+        };
+
+        template <class C>
+        struct has_linear_iterator<C, void_t<decltype(std::declval<C>().linear_cbegin())>> : std::true_type
+        {
+        };
+    }
+
+    template <class C>
+    XTENSOR_CONSTEXPR_RETURN auto linear_begin(C& c) noexcept
+    {
+        return xtl::mpl::static_if<detail::has_linear_iterator<C>::value>(
+            [&](auto self)
+            {
+                return self(c).linear_begin();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(c).begin();
+            }
+        );
+    }
+
+    template <class C>
+    XTENSOR_CONSTEXPR_RETURN auto linear_end(C& c) noexcept
+    {
+        return xtl::mpl::static_if<detail::has_linear_iterator<C>::value>(
+            [&](auto self)
+            {
+                return self(c).linear_end();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(c).end();
+            }
+        );
+    }
+
+    template <class C>
+    XTENSOR_CONSTEXPR_RETURN auto linear_begin(const C& c) noexcept
+    {
+        return xtl::mpl::static_if<detail::has_linear_iterator<C>::value>(
+            [&](auto self)
+            {
+                return self(c).linear_cbegin();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(c).cbegin();
+            }
+        );
+    }
+
+    template <class C>
+    XTENSOR_CONSTEXPR_RETURN auto linear_end(const C& c) noexcept
+    {
+        return xtl::mpl::static_if<detail::has_linear_iterator<C>::value>(
+            [&](auto self)
+            {
+                return self(c).linear_cend();
+            },
+            /*else*/
+            [&](auto self)
+            {
+                return self(c).cend();
+            }
+        );
+    }
+
+    /***************************
+     * xstepper implementation *
+     ***************************/
+
+    template <class C>
+    inline xstepper<C>::xstepper(storage_type* c, subiterator_type it, size_type offset) noexcept
+        : p_c(c)
+        , m_it(it)
+        , m_offset(offset)
+    {
+    }
+
+    template <class C>
+    inline auto xstepper<C>::operator*() const -> reference
+    {
+        return *m_it;
+    }
+
+    template <class C>
+    inline void xstepper<C>::step(size_type dim, size_type n)
+    {
+        if (dim >= m_offset)
+        {
+            using strides_value_type = typename std::decay_t<decltype(p_c->strides())>::value_type;
+            m_it += difference_type(static_cast<strides_value_type>(n) * p_c->strides()[dim - m_offset]);
+        }
+    }
+
+    template <class C>
+    inline void xstepper<C>::step_back(size_type dim, size_type n)
+    {
+        if (dim >= m_offset)
+        {
+            using strides_value_type = typename std::decay_t<decltype(p_c->strides())>::value_type;
+            m_it -= difference_type(static_cast<strides_value_type>(n) * p_c->strides()[dim - m_offset]);
+        }
+    }
+
+    template <class C>
+    inline void xstepper<C>::reset(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            m_it -= difference_type(p_c->backstrides()[dim - m_offset]);
+        }
+    }
+
+    template <class C>
+    inline void xstepper<C>::reset_back(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            m_it += difference_type(p_c->backstrides()[dim - m_offset]);
+        }
+    }
+
+    template <class C>
+    inline void xstepper<C>::to_begin()
+    {
+        m_it = p_c->data_xbegin();
+    }
+
+    template <class C>
+    inline void xstepper<C>::to_end(layout_type l)
+    {
+        m_it = p_c->data_xend(l, m_offset);
+    }
+
+    namespace detail
+    {
+        template <class It>
+        struct step_simd_invoker
+        {
+            template <class R>
+            static R apply(const It& it)
+            {
+                R reg;
+                return reg.load_unaligned(&(*it));
+                // return reg;
+            }
+        };
+
+        template <bool is_const, class T, class S, layout_type L>
+        struct step_simd_invoker<xiterator<xscalar_stepper<is_const, T>, S, L>>
+        {
+            template <class R>
+            static R apply(const xiterator<xscalar_stepper<is_const, T>, S, L>& it)
+            {
+                return R(*it);
+            }
+        };
+    }
+
+    template <class C>
+    template <class T>
+    inline auto xstepper<C>::step_simd() -> simd_return_type<T>
+    {
+        using simd_type = simd_return_type<T>;
+        simd_type reg = detail::step_simd_invoker<subiterator_type>::template apply<simd_type>(m_it);
+        m_it += xt_simd::revert_simd_traits<simd_type>::size;
+        return reg;
+    }
+
+    template <class C>
+    template <class R>
+    inline void xstepper<C>::store_simd(const R& vec)
+    {
+        vec.store_unaligned(&(*m_it));
+        m_it += xt_simd::revert_simd_traits<R>::size;
+        ;
+    }
+
+    template <class C>
+    void xstepper<C>::step_leading()
+    {
+        ++m_it;
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::row_major>::increment_stepper(S& stepper, IT& index, const ST& shape)
+    {
+        using size_type = typename S::size_type;
+        const size_type size = index.size();
+        size_type i = size;
+        while (i != 0)
+        {
+            --i;
+            if (index[i] != shape[i] - 1)
+            {
+                ++index[i];
+                stepper.step(i);
+                return;
+            }
+            else
+            {
+                index[i] = 0;
+                if (i != 0)
+                {
+                    stepper.reset(i);
+                }
+            }
+        }
+        if (i == 0)
+        {
+            if (size != size_type(0))
+            {
+                std::transform(
+                    shape.cbegin(),
+                    shape.cend() - 1,
+                    index.begin(),
+                    [](const auto& v)
+                    {
+                        return v - 1;
+                    }
+                );
+                index[size - 1] = shape[size - 1];
+            }
+            stepper.to_end(layout_type::row_major);
+        }
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::row_major>::increment_stepper(
+        S& stepper,
+        IT& index,
+        const ST& shape,
+        typename S::size_type n
+    )
+    {
+        using size_type = typename S::size_type;
+        const size_type size = index.size();
+        const size_type leading_i = size - 1;
+        size_type i = size;
+        while (i != 0 && n != 0)
+        {
+            --i;
+            size_type inc = (i == leading_i) ? n : 1;
+            if (xtl::cmp_less(index[i] + inc, shape[i]))
+            {
+                index[i] += inc;
+                stepper.step(i, inc);
+                n -= inc;
+                if (i != leading_i || index.size() == 1)
+                {
+                    i = index.size();
+                }
+            }
+            else
+            {
+                if (i == leading_i)
+                {
+                    size_type off = shape[i] - index[i] - 1;
+                    stepper.step(i, off);
+                    n -= off;
+                }
+                index[i] = 0;
+                if (i != 0)
+                {
+                    stepper.reset(i);
+                }
+            }
+        }
+        if (i == 0 && n != 0)
+        {
+            if (size != size_type(0))
+            {
+                std::transform(
+                    shape.cbegin(),
+                    shape.cend() - 1,
+                    index.begin(),
+                    [](const auto& v)
+                    {
+                        return v - 1;
+                    }
+                );
+                index[leading_i] = shape[leading_i];
+            }
+            stepper.to_end(layout_type::row_major);
+        }
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::row_major>::decrement_stepper(S& stepper, IT& index, const ST& shape)
+    {
+        using size_type = typename S::size_type;
+        size_type i = index.size();
+        while (i != 0)
+        {
+            --i;
+            if (index[i] != 0)
+            {
+                --index[i];
+                stepper.step_back(i);
+                return;
+            }
+            else
+            {
+                index[i] = shape[i] - 1;
+                if (i != 0)
+                {
+                    stepper.reset_back(i);
+                }
+            }
+        }
+        if (i == 0)
+        {
+            stepper.to_begin();
+        }
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::row_major>::decrement_stepper(
+        S& stepper,
+        IT& index,
+        const ST& shape,
+        typename S::size_type n
+    )
+    {
+        using size_type = typename S::size_type;
+        size_type i = index.size();
+        size_type leading_i = index.size() - 1;
+        while (i != 0 && n != 0)
+        {
+            --i;
+            size_type inc = (i == leading_i) ? n : 1;
+            if (xtl::cmp_greater_equal(index[i], inc))
+            {
+                index[i] -= inc;
+                stepper.step_back(i, inc);
+                n -= inc;
+                if (i != leading_i || index.size() == 1)
+                {
+                    i = index.size();
+                }
+            }
+            else
+            {
+                if (i == leading_i)
+                {
+                    size_type off = index[i];
+                    stepper.step_back(i, off);
+                    n -= off;
+                }
+                index[i] = shape[i] - 1;
+                if (i != 0)
+                {
+                    stepper.reset_back(i);
+                }
+            }
+        }
+        if (i == 0 && n != 0)
+        {
+            stepper.to_begin();
+        }
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::column_major>::increment_stepper(S& stepper, IT& index, const ST& shape)
+    {
+        using size_type = typename S::size_type;
+        const size_type size = index.size();
+        size_type i = 0;
+        while (i != size)
+        {
+            if (index[i] != shape[i] - 1)
+            {
+                ++index[i];
+                stepper.step(i);
+                return;
+            }
+            else
+            {
+                index[i] = 0;
+                if (i != size - 1)
+                {
+                    stepper.reset(i);
+                }
+            }
+            ++i;
+        }
+        if (i == size)
+        {
+            if (size != size_type(0))
+            {
+                std::transform(
+                    shape.cbegin() + 1,
+                    shape.cend(),
+                    index.begin() + 1,
+                    [](const auto& v)
+                    {
+                        return v - 1;
+                    }
+                );
+                index[0] = shape[0];
+            }
+            stepper.to_end(layout_type::column_major);
+        }
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::column_major>::increment_stepper(
+        S& stepper,
+        IT& index,
+        const ST& shape,
+        typename S::size_type n
+    )
+    {
+        using size_type = typename S::size_type;
+        const size_type size = index.size();
+        const size_type leading_i = 0;
+        size_type i = 0;
+        while (i != size && n != 0)
+        {
+            size_type inc = (i == leading_i) ? n : 1;
+            if (index[i] + inc < shape[i])
+            {
+                index[i] += inc;
+                stepper.step(i, inc);
+                n -= inc;
+                if (i != leading_i || size == 1)
+                {
+                    i = 0;
+                    continue;
+                }
+            }
+            else
+            {
+                if (i == leading_i)
+                {
+                    size_type off = shape[i] - index[i] - 1;
+                    stepper.step(i, off);
+                    n -= off;
+                }
+                index[i] = 0;
+                if (i != size - 1)
+                {
+                    stepper.reset(i);
+                }
+            }
+            ++i;
+        }
+        if (i == size && n != 0)
+        {
+            if (size != size_type(0))
+            {
+                std::transform(
+                    shape.cbegin() + 1,
+                    shape.cend(),
+                    index.begin() + 1,
+                    [](const auto& v)
+                    {
+                        return v - 1;
+                    }
+                );
+                index[leading_i] = shape[leading_i];
+            }
+            stepper.to_end(layout_type::column_major);
+        }
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::column_major>::decrement_stepper(S& stepper, IT& index, const ST& shape)
+    {
+        using size_type = typename S::size_type;
+        size_type size = index.size();
+        size_type i = 0;
+        while (i != size)
+        {
+            if (index[i] != 0)
+            {
+                --index[i];
+                stepper.step_back(i);
+                return;
+            }
+            else
+            {
+                index[i] = shape[i] - 1;
+                if (i != size - 1)
+                {
+                    stepper.reset_back(i);
+                }
+            }
+            ++i;
+        }
+        if (i == size)
+        {
+            stepper.to_begin();
+        }
+    }
+
+    template <>
+    template <class S, class IT, class ST>
+    void stepper_tools<layout_type::column_major>::decrement_stepper(
+        S& stepper,
+        IT& index,
+        const ST& shape,
+        typename S::size_type n
+    )
+    {
+        using size_type = typename S::size_type;
+        size_type size = index.size();
+        size_type i = 0;
+        size_type leading_i = 0;
+        while (i != size && n != 0)
+        {
+            size_type inc = (i == leading_i) ? n : 1;
+            if (index[i] >= inc)
+            {
+                index[i] -= inc;
+                stepper.step_back(i, inc);
+                n -= inc;
+                if (i != leading_i || index.size() == 1)
+                {
+                    i = 0;
+                    continue;
+                }
+            }
+            else
+            {
+                if (i == leading_i)
+                {
+                    size_type off = index[i];
+                    stepper.step_back(i, off);
+                    n -= off;
+                }
+                index[i] = shape[i] - 1;
+                if (i != size - 1)
+                {
+                    stepper.reset_back(i);
+                }
+            }
+            ++i;
+        }
+        if (i == size && n != 0)
+        {
+            stepper.to_begin();
+        }
+    }
+
+    /***********************************
+     * xindexed_stepper implementation *
+     ***********************************/
+
+    template <class C, bool is_const>
+    inline xindexed_stepper<C, is_const>::xindexed_stepper(xexpression_type* e, size_type offset, bool end) noexcept
+        : p_e(e)
+        , m_index(xtl::make_sequence<index_type>(e->shape().size(), size_type(0)))
+        , m_offset(offset)
+    {
+        if (end)
+        {
+            // Note: the layout here doesn't matter (unused) but using default traversal looks more "correct".
+            to_end(XTENSOR_DEFAULT_TRAVERSAL);
+        }
+    }
+
+    template <class C, bool is_const>
+    inline auto xindexed_stepper<C, is_const>::operator*() const -> reference
+    {
+        return p_e->element(m_index.cbegin(), m_index.cend());
+    }
+
+    template <class C, bool is_const>
+    inline void xindexed_stepper<C, is_const>::step(size_type dim, size_type n)
+    {
+        if (dim >= m_offset)
+        {
+            m_index[dim - m_offset] += static_cast<typename index_type::value_type>(n);
+        }
+    }
+
+    template <class C, bool is_const>
+    inline void xindexed_stepper<C, is_const>::step_back(size_type dim, size_type n)
+    {
+        if (dim >= m_offset)
+        {
+            m_index[dim - m_offset] -= static_cast<typename index_type::value_type>(n);
+        }
+    }
+
+    template <class C, bool is_const>
+    inline void xindexed_stepper<C, is_const>::reset(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            m_index[dim - m_offset] = 0;
+        }
+    }
+
+    template <class C, bool is_const>
+    inline void xindexed_stepper<C, is_const>::reset_back(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            m_index[dim - m_offset] = p_e->shape()[dim - m_offset] - 1;
+        }
+    }
+
+    template <class C, bool is_const>
+    inline void xindexed_stepper<C, is_const>::to_begin()
+    {
+        std::fill(m_index.begin(), m_index.end(), size_type(0));
+    }
+
+    template <class C, bool is_const>
+    inline void xindexed_stepper<C, is_const>::to_end(layout_type l)
+    {
+        const auto& shape = p_e->shape();
+        std::transform(
+            shape.cbegin(),
+            shape.cend(),
+            m_index.begin(),
+            [](const auto& v)
+            {
+                return v - 1;
+            }
+        );
+
+        size_type l_dim = (l == layout_type::row_major) ? shape.size() - 1 : 0;
+        m_index[l_dim] = shape[l_dim];
+    }
+
+    /****************************
+     * xiterator implementation *
+     ****************************/
+
+    namespace detail
+    {
+        template <class S>
+        inline shape_storage<S>::shape_storage(param_type shape)
+            : m_shape(shape)
+        {
+        }
+
+        template <class S>
+        inline const S& shape_storage<S>::shape() const
+        {
+            return m_shape;
+        }
+
+        template <class S>
+        inline shape_storage<S*>::shape_storage(param_type shape)
+            : p_shape(shape)
+        {
+        }
+
+        template <class S>
+        inline const S& shape_storage<S*>::shape() const
+        {
+            return *p_shape;
+        }
+
+        template <>
+        struct LAYOUT_FORBIDEN_FOR_XITERATOR<layout_type::row_major>
+        {
+            using type = int;
+        };
+
+        template <>
+        struct LAYOUT_FORBIDEN_FOR_XITERATOR<layout_type::column_major>
+        {
+            using type = int;
+        };
+    }
+
+    template <class St, class S, layout_type L>
+    inline xiterator<St, S, L>::xiterator(St st, shape_param_type shape, bool end_index)
+        : private_base(shape)
+        , m_st(st)
+        , m_index(
+              end_index ? xtl::forward_sequence<index_type, const shape_type&>(this->shape())
+                        : xtl::make_sequence<index_type>(this->shape().size(), size_type(0))
+          )
+        , m_linear_index(0)
+    {
+        // end_index means either reverse_iterator && !end or !reverse_iterator && end
+        if (end_index)
+        {
+            if (m_index.size() != size_type(0))
+            {
+                auto iter_begin = (L == layout_type::row_major) ? m_index.begin() : m_index.begin() + 1;
+                auto iter_end = (L == layout_type::row_major) ? m_index.end() - 1 : m_index.end();
+                std::transform(
+                    iter_begin,
+                    iter_end,
+                    iter_begin,
+                    [](const auto& v)
+                    {
+                        return v - 1;
+                    }
+                );
+            }
+            m_linear_index = difference_type(std::accumulate(
+                this->shape().cbegin(),
+                this->shape().cend(),
+                size_type(1),
+                std::multiplies<size_type>()
+            ));
+        }
+    }
+
+    template <class St, class S, layout_type L>
+    inline auto xiterator<St, S, L>::operator++() -> self_type&
+    {
+        stepper_tools<L>::increment_stepper(m_st, m_index, this->shape());
+        ++m_linear_index;
+        return *this;
+    }
+
+    template <class St, class S, layout_type L>
+    inline auto xiterator<St, S, L>::operator--() -> self_type&
+    {
+        stepper_tools<L>::decrement_stepper(m_st, m_index, this->shape());
+        --m_linear_index;
+        return *this;
+    }
+
+    template <class St, class S, layout_type L>
+    inline auto xiterator<St, S, L>::operator+=(difference_type n) -> self_type&
+    {
+        if (n >= 0)
+        {
+            stepper_tools<L>::increment_stepper(m_st, m_index, this->shape(), static_cast<size_type>(n));
+        }
+        else
+        {
+            stepper_tools<L>::decrement_stepper(m_st, m_index, this->shape(), static_cast<size_type>(-n));
+        }
+        m_linear_index += n;
+        return *this;
+    }
+
+    template <class St, class S, layout_type L>
+    inline auto xiterator<St, S, L>::operator-=(difference_type n) -> self_type&
+    {
+        if (n >= 0)
+        {
+            stepper_tools<L>::decrement_stepper(m_st, m_index, this->shape(), static_cast<size_type>(n));
+        }
+        else
+        {
+            stepper_tools<L>::increment_stepper(m_st, m_index, this->shape(), static_cast<size_type>(-n));
+        }
+        m_linear_index -= n;
+        return *this;
+    }
+
+    template <class St, class S, layout_type L>
+    inline auto xiterator<St, S, L>::operator-(const self_type& rhs) const -> difference_type
+    {
+        return m_linear_index - rhs.m_linear_index;
+    }
+
+    template <class St, class S, layout_type L>
+    inline auto xiterator<St, S, L>::operator*() const -> reference
+    {
+        return *m_st;
+    }
+
+    template <class St, class S, layout_type L>
+    inline auto xiterator<St, S, L>::operator->() const -> pointer
+    {
+        return &(*m_st);
+    }
+
+    template <class St, class S, layout_type L>
+    inline bool xiterator<St, S, L>::equal(const xiterator& rhs) const
+    {
+        XTENSOR_ASSERT(this->shape() == rhs.shape());
+        return m_linear_index == rhs.m_linear_index;
+    }
+
+    template <class St, class S, layout_type L>
+    inline bool xiterator<St, S, L>::less_than(const xiterator& rhs) const
+    {
+        XTENSOR_ASSERT(this->shape() == rhs.shape());
+        return m_linear_index < rhs.m_linear_index;
+    }
+
+    template <class St, class S, layout_type L>
+    inline bool operator==(const xiterator<St, S, L>& lhs, const xiterator<St, S, L>& rhs)
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <class St, class S, layout_type L>
+    bool operator<(const xiterator<St, S, L>& lhs, const xiterator<St, S, L>& rhs)
+    {
+        return lhs.less_than(rhs);
+    }
+
+    /************************************
+     * xbounded_iterator implementation *
+     ************************************/
+
+    template <class It, class BIt>
+    xbounded_iterator<It, BIt>::xbounded_iterator(It it, BIt bound_it)
+        : m_it(it)
+        , m_bound_it(bound_it)
+    {
+    }
+
+    template <class It, class BIt>
+    inline auto xbounded_iterator<It, BIt>::operator++() -> self_type&
+    {
+        ++m_it;
+        ++m_bound_it;
+        return *this;
+    }
+
+    template <class It, class BIt>
+    inline auto xbounded_iterator<It, BIt>::operator--() -> self_type&
+    {
+        --m_it;
+        --m_bound_it;
+        return *this;
+    }
+
+    template <class It, class BIt>
+    inline auto xbounded_iterator<It, BIt>::operator+=(difference_type n) -> self_type&
+    {
+        m_it += n;
+        m_bound_it += n;
+        return *this;
+    }
+
+    template <class It, class BIt>
+    inline auto xbounded_iterator<It, BIt>::operator-=(difference_type n) -> self_type&
+    {
+        m_it -= n;
+        m_bound_it -= n;
+        return *this;
+    }
+
+    template <class It, class BIt>
+    inline auto xbounded_iterator<It, BIt>::operator-(const self_type& rhs) const -> difference_type
+    {
+        return m_it - rhs.m_it;
+    }
+
+    template <class It, class BIt>
+    inline auto xbounded_iterator<It, BIt>::operator*() const -> value_type
+    {
+        using type = decltype(*m_bound_it);
+        return (static_cast<type>(*m_it) < *m_bound_it) ? *m_it : static_cast<value_type>((*m_bound_it) - 1);
+    }
+
+    template <class It, class BIt>
+    inline bool xbounded_iterator<It, BIt>::equal(const self_type& rhs) const
+    {
+        return m_it == rhs.m_it && m_bound_it == rhs.m_bound_it;
+    }
+
+    template <class It, class BIt>
+    inline bool xbounded_iterator<It, BIt>::less_than(const self_type& rhs) const
+    {
+        return m_it < rhs.m_it;
+    }
+
+    template <class It, class BIt>
+    inline bool operator==(const xbounded_iterator<It, BIt>& lhs, const xbounded_iterator<It, BIt>& rhs)
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <class It, class BIt>
+    inline bool operator<(const xbounded_iterator<It, BIt>& lhs, const xbounded_iterator<It, BIt>& rhs)
+    {
+        return lhs.less_than(rhs);
+    }
+}
+
+#endif

+ 104 - 0
3rd/numpy/include/xtensor/xlayout.hpp

@@ -0,0 +1,104 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_LAYOUT_HPP
+#define XTENSOR_LAYOUT_HPP
+
+#include <type_traits>
+
+// Do not include anything else here.
+// xlayout.hpp is included in xtensor_forward.hpp
+// and we don't want to bring other headers to it.
+#include "xtensor_config.hpp"
+
+namespace xt
+{
+    /*! layout_type enum for xcontainer based xexpressions */
+    enum class layout_type
+    {
+        /*! dynamic layout_type: you can resize to row major, column major, or use custom strides */
+        dynamic = 0x00,
+        /*! layout_type compatible with all others */
+        any = 0xFF,
+        /*! row major layout_type */
+        row_major = 0x01,
+        /*! column major layout_type */
+        column_major = 0x02
+    };
+
+    /**
+     * Implementation of the following logical table:
+     *
+     *        | d | a | r | c |
+     *      --+---+---+---+---+
+     *      d | d | d | d | d |
+     *      a | d | a | r | c |
+     *      r | d | r | r | d |
+     *      c | d | c | d | c |
+     *      d = dynamic, a = any, r = row_major, c = column_major.
+     *
+     * Using bitmasks to avoid nested if-else statements.
+     *
+     * @param args the input layouts.
+     * @return the output layout, computed with the previous logical table.
+     */
+    template <class... Args>
+    constexpr layout_type compute_layout(Args... args) noexcept;
+
+    constexpr layout_type default_assignable_layout(layout_type l) noexcept;
+
+    constexpr layout_type layout_remove_any(const layout_type layout) noexcept;
+
+    /******************
+     * Implementation *
+     ******************/
+
+    namespace detail
+    {
+        constexpr layout_type compute_layout_impl() noexcept
+        {
+            return layout_type::any;
+        }
+
+        constexpr layout_type compute_layout_impl(layout_type l) noexcept
+        {
+            return l;
+        }
+
+        constexpr layout_type compute_layout_impl(layout_type lhs, layout_type rhs) noexcept
+        {
+            using type = std::underlying_type_t<layout_type>;
+            return layout_type(static_cast<type>(lhs) & static_cast<type>(rhs));
+        }
+
+        template <class... Args>
+        constexpr layout_type compute_layout_impl(layout_type lhs, Args... args) noexcept
+        {
+            return compute_layout_impl(lhs, compute_layout_impl(args...));
+        }
+    }
+
+    template <class... Args>
+    constexpr layout_type compute_layout(Args... args) noexcept
+    {
+        return detail::compute_layout_impl(args...);
+    }
+
+    constexpr layout_type default_assignable_layout(layout_type l) noexcept
+    {
+        return (l == layout_type::row_major || l == layout_type::column_major) ? l : XTENSOR_DEFAULT_LAYOUT;
+    }
+
+    constexpr layout_type layout_remove_any(const layout_type layout) noexcept
+    {
+        return layout == layout_type::any ? XTENSOR_DEFAULT_LAYOUT : layout;
+    }
+}
+
+#endif

+ 1145 - 0
3rd/numpy/include/xtensor/xmanipulation.hpp

@@ -0,0 +1,1145 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_MANIPULATION_HPP
+#define XTENSOR_MANIPULATION_HPP
+
+#include <algorithm>
+#include <utility>
+
+#include <xtl/xcompare.hpp>
+#include <xtl/xsequence.hpp>
+
+#include "xbuilder.hpp"
+#include "xexception.hpp"
+#include "xrepeat.hpp"
+#include "xstrided_view.hpp"
+#include "xtensor_config.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    /**
+     * @defgroup xt_xmanipulation
+     */
+
+    namespace check_policy
+    {
+        struct none
+        {
+        };
+
+        struct full
+        {
+        };
+    }
+
+    template <class E>
+    auto transpose(E&& e) noexcept;
+
+    template <class E, class S, class Tag = check_policy::none>
+    auto transpose(E&& e, S&& permutation, Tag check_policy = Tag());
+
+    template <class E>
+    auto swapaxes(E&& e, std::ptrdiff_t axis1, std::ptrdiff_t axis2);
+
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E>
+    auto ravel(E&& e);
+
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E>
+    auto flatten(E&& e);
+
+    template <layout_type L, class T>
+    auto flatnonzero(const T& arr);
+
+    template <class E>
+    auto trim_zeros(E&& e, const std::string& direction = "fb");
+
+    template <class E>
+    auto squeeze(E&& e);
+
+    template <class E, class S, class Tag = check_policy::none, std::enable_if_t<!xtl::is_integral<S>::value, int> = 0>
+    auto squeeze(E&& e, S&& axis, Tag check_policy = Tag());
+
+    template <class E>
+    auto expand_dims(E&& e, std::size_t axis);
+
+    template <std::size_t N, class E>
+    auto atleast_Nd(E&& e);
+
+    template <class E>
+    auto atleast_1d(E&& e);
+
+    template <class E>
+    auto atleast_2d(E&& e);
+
+    template <class E>
+    auto atleast_3d(E&& e);
+
+    template <class E>
+    auto split(E& e, std::size_t n, std::size_t axis = 0);
+
+    template <class E>
+    auto hsplit(E& e, std::size_t n);
+
+    template <class E>
+    auto vsplit(E& e, std::size_t n);
+
+    template <class E>
+    auto flip(E&& e);
+
+    template <class E>
+    auto flip(E&& e, std::size_t axis);
+
+    template <std::ptrdiff_t N = 1, class E>
+    auto rot90(E&& e, const std::array<std::ptrdiff_t, 2>& axes = {0, 1});
+
+    template <class E>
+    auto roll(E&& e, std::ptrdiff_t shift);
+
+    template <class E>
+    auto roll(E&& e, std::ptrdiff_t shift, std::ptrdiff_t axis);
+
+    template <class E>
+    auto repeat(E&& e, std::size_t repeats, std::size_t axis);
+
+    template <class E>
+    auto repeat(E&& e, const std::vector<std::size_t>& repeats, std::size_t axis);
+
+    template <class E>
+    auto repeat(E&& e, std::vector<std::size_t>&& repeats, std::size_t axis);
+
+    /****************************
+     * transpose implementation *
+     ****************************/
+
+    namespace detail
+    {
+        inline layout_type transpose_layout_noexcept(layout_type l) noexcept
+        {
+            layout_type result = l;
+            if (l == layout_type::row_major)
+            {
+                result = layout_type::column_major;
+            }
+            else if (l == layout_type::column_major)
+            {
+                result = layout_type::row_major;
+            }
+            return result;
+        }
+
+        inline layout_type transpose_layout(layout_type l)
+        {
+            if (l != layout_type::row_major && l != layout_type::column_major)
+            {
+                XTENSOR_THROW(transpose_error, "cannot compute transposed layout of dynamic layout");
+            }
+            return transpose_layout_noexcept(l);
+        }
+
+        template <class E, class S>
+        inline auto transpose_impl(E&& e, S&& permutation, check_policy::none)
+        {
+            if (sequence_size(permutation) != e.dimension())
+            {
+                XTENSOR_THROW(transpose_error, "Permutation does not have the same size as shape");
+            }
+
+            // permute stride and shape
+            using shape_type = xindex_type_t<typename std::decay_t<E>::shape_type>;
+            shape_type temp_shape;
+            resize_container(temp_shape, e.shape().size());
+
+            using strides_type = get_strides_t<shape_type>;
+            strides_type temp_strides;
+            resize_container(temp_strides, e.strides().size());
+
+            using size_type = typename std::decay_t<E>::size_type;
+            for (std::size_t i = 0; i < e.shape().size(); ++i)
+            {
+                if (std::size_t(permutation[i]) >= e.dimension())
+                {
+                    XTENSOR_THROW(transpose_error, "Permutation contains wrong axis");
+                }
+                size_type perm = static_cast<size_type>(permutation[i]);
+                temp_shape[i] = e.shape()[perm];
+                temp_strides[i] = e.strides()[perm];
+            }
+
+            layout_type new_layout = layout_type::dynamic;
+            if (std::is_sorted(std::begin(permutation), std::end(permutation)))
+            {
+                // keep old layout
+                new_layout = e.layout();
+            }
+            else if (std::is_sorted(std::begin(permutation), std::end(permutation), std::greater<>()))
+            {
+                new_layout = transpose_layout_noexcept(e.layout());
+            }
+
+            return strided_view(
+                std::forward<E>(e),
+                std::move(temp_shape),
+                std::move(temp_strides),
+                get_offset<XTENSOR_DEFAULT_LAYOUT>(e),
+                new_layout
+            );
+        }
+
+        template <class E, class S>
+        inline auto transpose_impl(E&& e, S&& permutation, check_policy::full)
+        {
+            // check if axis appears twice in permutation
+            for (std::size_t i = 0; i < sequence_size(permutation); ++i)
+            {
+                for (std::size_t j = i + 1; j < sequence_size(permutation); ++j)
+                {
+                    if (permutation[i] == permutation[j])
+                    {
+                        XTENSOR_THROW(transpose_error, "Permutation contains axis more than once");
+                    }
+                }
+            }
+            return transpose_impl(std::forward<E>(e), std::forward<S>(permutation), check_policy::none());
+        }
+
+        template <class E, class S, class X, std::enable_if_t<has_data_interface<std::decay_t<E>>::value>* = nullptr>
+        inline void compute_transposed_strides(E&& e, const S&, X& strides)
+        {
+            std::copy(e.strides().crbegin(), e.strides().crend(), strides.begin());
+        }
+
+        template <class E, class S, class X, std::enable_if_t<!has_data_interface<std::decay_t<E>>::value>* = nullptr>
+        inline void compute_transposed_strides(E&&, const S& shape, X& strides)
+        {
+            // In the case where E does not have a data interface, the transposition
+            // makes use of a flat storage adaptor that has layout XTENSOR_DEFAULT_TRAVERSAL
+            // which should be the one inverted.
+            layout_type l = transpose_layout(XTENSOR_DEFAULT_TRAVERSAL);
+            compute_strides(shape, l, strides);
+        }
+    }
+
+    /**
+     * Returns a transpose view by reversing the dimensions of xexpression e
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input expression
+     */
+    template <class E>
+    inline auto transpose(E&& e) noexcept
+    {
+        using shape_type = xindex_type_t<typename std::decay_t<E>::shape_type>;
+        shape_type shape;
+        resize_container(shape, e.shape().size());
+        std::copy(e.shape().crbegin(), e.shape().crend(), shape.begin());
+
+        get_strides_t<shape_type> strides;
+        resize_container(strides, e.shape().size());
+        detail::compute_transposed_strides(e, shape, strides);
+
+        layout_type new_layout = detail::transpose_layout_noexcept(e.layout());
+
+        return strided_view(
+            std::forward<E>(e),
+            std::move(shape),
+            std::move(strides),
+            detail::get_offset<XTENSOR_DEFAULT_TRAVERSAL>(e),
+            new_layout
+        );
+    }
+
+    /**
+     * Returns a transpose view by permuting the xexpression e with @p permutation.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input expression
+     * @param permutation the sequence containing permutation
+     * @param check_policy the check level (check_policy::full() or check_policy::none())
+     * @tparam Tag selects the level of error checking on permutation vector defaults to check_policy::none.
+     */
+    template <class E, class S, class Tag>
+    inline auto transpose(E&& e, S&& permutation, Tag check_policy)
+    {
+        return detail::transpose_impl(std::forward<E>(e), std::forward<S>(permutation), check_policy);
+    }
+
+    /// @cond DOXYGEN_INCLUDE_SFINAE
+    template <class E, class I, std::size_t N, class Tag = check_policy::none>
+    inline auto transpose(E&& e, const I (&permutation)[N], Tag check_policy = Tag())
+    {
+        return detail::transpose_impl(std::forward<E>(e), permutation, check_policy);
+    }
+
+    /// @endcond
+
+    /*****************************
+     *  swapaxes implementation  *
+     *****************************/
+
+    namespace detail
+    {
+        template <class S>
+        inline S swapaxes_perm(std::size_t dim, std::ptrdiff_t axis1, std::ptrdiff_t axis2)
+        {
+            const std::size_t ax1 = normalize_axis(dim, axis1);
+            const std::size_t ax2 = normalize_axis(dim, axis2);
+            auto perm = xtl::make_sequence<S>(dim, 0);
+            using id_t = typename S::value_type;
+            std::iota(perm.begin(), perm.end(), id_t(0));
+            perm[ax1] = ax2;
+            perm[ax2] = ax1;
+            return perm;
+        }
+    }
+
+    /**
+     * Return a new expression with two axes interchanged.
+     *
+     * The two axis parameter @p axis and @p axis2 are interchangable.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e The input expression
+     * @param axis1 First axis to swap
+     * @param axis2 Second axis to swap
+     */
+    template <class E>
+    inline auto swapaxes(E&& e, std::ptrdiff_t axis1, std::ptrdiff_t axis2)
+    {
+        const auto dim = e.dimension();
+        check_axis_in_dim(axis1, dim, "Parameter axis1");
+        check_axis_in_dim(axis2, dim, "Parameter axis2");
+
+        using strides_t = get_strides_t<typename std::decay_t<E>::shape_type>;
+        return transpose(std::forward<E>(e), detail::swapaxes_perm<strides_t>(dim, axis1, axis2));
+    }
+
+    /*****************************
+     *  moveaxis implementation  *
+     *****************************/
+
+    namespace detail
+    {
+        template <class S>
+        inline S moveaxis_perm(std::size_t dim, std::ptrdiff_t src, std::ptrdiff_t dest)
+        {
+            using id_t = typename S::value_type;
+
+            const std::size_t src_norm = normalize_axis(dim, src);
+            const std::size_t dest_norm = normalize_axis(dim, dest);
+
+            // Initializing to src_norm handles case where `dest == -1` and the loop
+            // does not go check `perm_idx == dest_norm` a `dim+1`th time.
+            auto perm = xtl::make_sequence<S>(dim, src_norm);
+            id_t perm_idx = 0;
+            for (id_t i = 0; xtl::cmp_less(i, dim); ++i)
+            {
+                if (xtl::cmp_equal(perm_idx, dest_norm))
+                {
+                    perm[perm_idx] = src_norm;
+                    ++perm_idx;
+                }
+                if (xtl::cmp_not_equal(i, src_norm))
+                {
+                    perm[perm_idx] = i;
+                    ++perm_idx;
+                }
+            }
+            return perm;
+        }
+    }
+
+    /**
+     * Return a new expression with an axis move to a new position.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e The input expression
+     * @param src Original position of the axis to move
+     * @param dest Destination position for the original axis.
+     */
+    template <class E>
+    inline auto moveaxis(E&& e, std::ptrdiff_t src, std::ptrdiff_t dest)
+    {
+        const auto dim = e.dimension();
+        check_axis_in_dim(src, dim, "Parameter src");
+        check_axis_in_dim(dest, dim, "Parameter dest");
+
+        using strides_t = get_strides_t<typename std::decay_t<E>::shape_type>;
+        return xt::transpose(std::forward<E>(e), detail::moveaxis_perm<strides_t>(e.dimension(), src, dest));
+    }
+
+    /************************************
+     * ravel and flatten implementation *
+     ************************************/
+
+    namespace detail
+    {
+        template <class E, layout_type L>
+        struct expression_iterator_getter
+        {
+            using iterator = decltype(std::declval<E>().template begin<L>());
+            using const_iterator = decltype(std::declval<E>().template cbegin<L>());
+
+            inline static iterator begin(E& e)
+            {
+                return e.template begin<L>();
+            }
+
+            inline static const_iterator cbegin(E& e)
+            {
+                return e.template cbegin<L>();
+            }
+
+            inline static auto size(E& e)
+            {
+                return e.size();
+            }
+        };
+    }
+
+    /**
+     * Return a flatten view of the given expression. No copy is made.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input expression
+     * @tparam L the layout used to read the elements of e.
+     *     If no parameter is specified, XTENSOR_DEFAULT_TRAVERSAL is used.
+     * @tparam E the type of the expression
+     */
+    template <layout_type L, class E>
+    inline auto ravel(E&& e)
+    {
+        using iterator = decltype(e.template begin<L>());
+        using iterator_getter = detail::expression_iterator_getter<std::remove_reference_t<E>, L>;
+        auto size = e.size();
+        auto adaptor = make_xiterator_adaptor(std::forward<E>(e), iterator_getter());
+        constexpr layout_type layout = std::is_pointer<iterator>::value ? L : layout_type::dynamic;
+        using type = xtensor_view<decltype(adaptor), 1, layout, extension::get_expression_tag_t<E>>;
+        return type(std::move(adaptor), {size});
+    }
+
+    /**
+     * Return a flatten view of the given expression.
+     *
+     * No copy is made.
+     * This method is equivalent to ravel and is provided for API sameness with NumPy.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input expression
+     * @tparam L the layout used to read the elements of e.
+     *     If no parameter is specified, XTENSOR_DEFAULT_TRAVERSAL is used.
+     * @tparam E the type of the expression
+     * @sa ravel
+     */
+    template <layout_type L, class E>
+    inline auto flatten(E&& e)
+    {
+        return ravel<L>(std::forward<E>(e));
+    }
+
+    /**
+     * Return indices that are non-zero in the flattened version of arr.
+     *
+     * Equivalent to ``nonzero(ravel<layout_type>(arr))[0];``
+     *
+     * @param arr input array
+     * @return indices that are non-zero in the flattened version of arr
+     */
+    template <layout_type L, class T>
+    inline auto flatnonzero(const T& arr)
+    {
+        return nonzero(ravel<L>(arr))[0];
+    }
+
+    /*****************************
+     * trim_zeros implementation *
+     *****************************/
+
+    /**
+     * Trim zeros at beginning, end or both of 1D sequence.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e input xexpression
+     * @param direction string of either 'f' for trim from beginning, 'b' for trim from end
+     *                  or 'fb' (default) for both.
+     * @return returns a view without zeros at the beginning and end
+     */
+    template <class E>
+    inline auto trim_zeros(E&& e, const std::string& direction)
+    {
+        XTENSOR_ASSERT_MSG(e.dimension() == 1, "Dimension for trim_zeros has to be 1.");
+
+        std::ptrdiff_t begin = 0, end = static_cast<std::ptrdiff_t>(e.size());
+
+        auto find_fun = [](const auto& i)
+        {
+            return i != 0;
+        };
+
+        if (direction.find("f") != std::string::npos)
+        {
+            begin = std::find_if(e.cbegin(), e.cend(), find_fun) - e.cbegin();
+        }
+
+        if (direction.find("b") != std::string::npos && begin != end)
+        {
+            end -= std::find_if(e.crbegin(), e.crend(), find_fun) - e.crbegin();
+        }
+
+        return strided_view(std::forward<E>(e), {range(begin, end)});
+    }
+
+    /**************************
+     * squeeze implementation *
+     **************************/
+
+    /**
+     * Returns a squeeze view of the given expression.
+     *
+     * No copy is made. Squeezing an expression removes dimensions of extent 1.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input expression
+     * @tparam E the type of the expression
+     */
+    template <class E>
+    inline auto squeeze(E&& e)
+    {
+        dynamic_shape<std::size_t> new_shape;
+        dynamic_shape<std::ptrdiff_t> new_strides;
+        std::copy_if(
+            e.shape().cbegin(),
+            e.shape().cend(),
+            std::back_inserter(new_shape),
+            [](std::size_t i)
+            {
+                return i != 1;
+            }
+        );
+        decltype(auto) old_strides = detail::get_strides<XTENSOR_DEFAULT_LAYOUT>(e);
+        std::copy_if(
+            old_strides.cbegin(),
+            old_strides.cend(),
+            std::back_inserter(new_strides),
+            [](std::ptrdiff_t i)
+            {
+                return i != 0;
+            }
+        );
+
+        return strided_view(std::forward<E>(e), std::move(new_shape), std::move(new_strides), 0, e.layout());
+    }
+
+    namespace detail
+    {
+        template <class E, class S>
+        inline auto squeeze_impl(E&& e, S&& axis, check_policy::none)
+        {
+            std::size_t new_dim = e.dimension() - axis.size();
+            dynamic_shape<std::size_t> new_shape(new_dim);
+            dynamic_shape<std::ptrdiff_t> new_strides(new_dim);
+
+            decltype(auto) old_strides = detail::get_strides<XTENSOR_DEFAULT_LAYOUT>(e);
+
+            for (std::size_t i = 0, ix = 0; i < e.dimension(); ++i)
+            {
+                if (axis.cend() == std::find(axis.cbegin(), axis.cend(), i))
+                {
+                    new_shape[ix] = e.shape()[i];
+                    new_strides[ix++] = old_strides[i];
+                }
+            }
+
+            return strided_view(std::forward<E>(e), std::move(new_shape), std::move(new_strides), 0, e.layout());
+        }
+
+        template <class E, class S>
+        inline auto squeeze_impl(E&& e, S&& axis, check_policy::full)
+        {
+            for (auto ix : axis)
+            {
+                if (static_cast<std::size_t>(ix) > e.dimension())
+                {
+                    XTENSOR_THROW(std::runtime_error, "Axis argument to squeeze > dimension of expression");
+                }
+                if (e.shape()[static_cast<std::size_t>(ix)] != 1)
+                {
+                    XTENSOR_THROW(std::runtime_error, "Trying to squeeze axis != 1");
+                }
+            }
+            return squeeze_impl(std::forward<E>(e), std::forward<S>(axis), check_policy::none());
+        }
+    }
+
+    /**
+     * Remove single-dimensional entries from the shape of an xexpression
+     *
+     * @ingroup xt_xmanipulation
+     * @param e input xexpression
+     * @param axis integer or container of integers, select a subset of single-dimensional
+     *        entries of the shape.
+     * @param check_policy select check_policy. With check_policy::full(), selecting an axis
+     *        which is greater than one will throw a runtime_error.
+     */
+    template <class E, class S, class Tag, std::enable_if_t<!xtl::is_integral<S>::value, int>>
+    inline auto squeeze(E&& e, S&& axis, Tag check_policy)
+    {
+        return detail::squeeze_impl(std::forward<E>(e), std::forward<S>(axis), check_policy);
+    }
+
+    /// @cond DOXYGEN_INCLUDE_SFINAE
+    template <class E, class I, std::size_t N, class Tag = check_policy::none>
+    inline auto squeeze(E&& e, const I (&axis)[N], Tag check_policy = Tag())
+    {
+        using arr_t = std::array<I, N>;
+        return detail::squeeze_impl(
+            std::forward<E>(e),
+            xtl::forward_sequence<arr_t, decltype(axis)>(axis),
+            check_policy
+        );
+    }
+
+    template <class E, class Tag = check_policy::none>
+    inline auto squeeze(E&& e, std::size_t axis, Tag check_policy = Tag())
+    {
+        return squeeze(std::forward<E>(e), std::array<std::size_t, 1>{axis}, check_policy);
+    }
+
+    /// @endcond
+
+    /******************************
+     * expand_dims implementation *
+     ******************************/
+
+    /**
+     * Expand the shape of an xexpression.
+     *
+     * Insert a new axis that will appear at the axis position in the expanded array shape.
+     * This will return a ``strided_view`` with a ``xt::newaxis()`` at the indicated axis.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e input xexpression
+     * @param axis axis to expand
+     * @return returns a ``strided_view`` with expanded dimension
+     */
+    template <class E>
+    inline auto expand_dims(E&& e, std::size_t axis)
+    {
+        xstrided_slice_vector sv(e.dimension() + 1, all());
+        sv[axis] = newaxis();
+        return strided_view(std::forward<E>(e), std::move(sv));
+    }
+
+    /*****************************
+     * atleast_Nd implementation *
+     *****************************/
+
+    /**
+     * Expand dimensions of xexpression to at least `N`
+     *
+     * This adds ``newaxis()`` slices to a ``strided_view`` until
+     * the dimension of the view reaches at least `N`.
+     * Note: dimensions are added equally at the beginning and the end.
+     * For example, a 1-D array of shape (N,) becomes a view of shape (1, N, 1).
+     *
+     * @ingroup xt_xmanipulation
+     * @param e input xexpression
+     * @tparam N the number of requested dimensions
+     * @return ``strided_view`` with expanded dimensions
+     */
+    template <std::size_t N, class E>
+    inline auto atleast_Nd(E&& e)
+    {
+        xstrided_slice_vector sv((std::max)(e.dimension(), N), all());
+        if (e.dimension() < N)
+        {
+            std::size_t i = 0;
+            std::size_t end = static_cast<std::size_t>(std::round(double(N - e.dimension()) / double(N)));
+            for (; i < end; ++i)
+            {
+                sv[i] = newaxis();
+            }
+            i += e.dimension();
+            for (; i < N; ++i)
+            {
+                sv[i] = newaxis();
+            }
+        }
+        return strided_view(std::forward<E>(e), std::move(sv));
+    }
+
+    /**
+     * Expand to at least 1D
+     *
+     * @ingroup xt_xmanipulation
+     * @sa atleast_Nd
+     */
+    template <class E>
+    inline auto atleast_1d(E&& e)
+    {
+        return atleast_Nd<1>(std::forward<E>(e));
+    }
+
+    /**
+     * Expand to at least 2D
+     *
+     * @ingroup xt_xmanipulation
+     * @sa atleast_Nd
+     */
+    template <class E>
+    inline auto atleast_2d(E&& e)
+    {
+        return atleast_Nd<2>(std::forward<E>(e));
+    }
+
+    /**
+     * Expand to at least 3D
+     *
+     * @ingroup xt_xmanipulation
+     * @sa atleast_Nd
+     */
+    template <class E>
+    inline auto atleast_3d(E&& e)
+    {
+        return atleast_Nd<3>(std::forward<E>(e));
+    }
+
+    /************************
+     * split implementation *
+     ************************/
+
+    /**
+     * Split xexpression along axis into subexpressions
+     *
+     * This splits an xexpression along the axis in `n` equal parts and
+     * returns a vector of ``strided_view``.
+     * Calling split with axis > dimension of e or a `n` that does not result in
+     * an equal division of the xexpression will throw a runtime_error.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e input xexpression
+     * @param n number of elements to return
+     * @param axis axis along which to split the expression
+     */
+    template <class E>
+    inline auto split(E& e, std::size_t n, std::size_t axis)
+    {
+        if (axis >= e.dimension())
+        {
+            XTENSOR_THROW(std::runtime_error, "Split along axis > dimension.");
+        }
+
+        std::size_t ax_sz = e.shape()[axis];
+        xstrided_slice_vector sv(e.dimension(), all());
+        std::size_t step = ax_sz / n;
+        std::size_t rest = ax_sz % n;
+
+        if (rest)
+        {
+            XTENSOR_THROW(std::runtime_error, "Split does not result in equal division.");
+        }
+
+        std::vector<decltype(strided_view(e, sv))> result;
+        for (std::size_t i = 0; i < n; ++i)
+        {
+            sv[axis] = range(i * step, (i + 1) * step);
+            result.emplace_back(strided_view(e, sv));
+        }
+        return result;
+    }
+
+    /**
+     * Split an xexpression into subexpressions horizontally (column-wise)
+     *
+     * This method is equivalent to ``split(e, n, 1)``.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e input xexpression
+     * @param n number of elements to return
+     */
+    template <class E>
+    inline auto hsplit(E& e, std::size_t n)
+    {
+        return split(e, n, std::size_t(1));
+    }
+
+    /**
+     * Split an xexpression into subexpressions vertically (row-wise)
+     *
+     * This method is equivalent to ``split(e, n, 0)``.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e input xexpression
+     * @param n number of elements to return
+     */
+    template <class E>
+    inline auto vsplit(E& e, std::size_t n)
+    {
+        return split(e, n, std::size_t(0));
+    }
+
+    /***********************
+     * flip implementation *
+     ***********************/
+
+    /**
+     * Reverse the order of elements in an xexpression along every axis.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @return returns a view with the result of the flip.
+     */
+    template <class E>
+    inline auto flip(E&& e)
+    {
+        using size_type = typename std::decay_t<E>::size_type;
+        auto r = flip(e, 0);
+        for (size_type d = 1; d < e.dimension(); ++d)
+        {
+            r = flip(r, d);
+        }
+        return r;
+    }
+
+    /**
+     * Reverse the order of elements in an xexpression along the given axis.
+     *
+     * Note: A NumPy/Matlab style `flipud(arr)` is equivalent to `xt::flip(arr, 0)`,
+     * `fliplr(arr)` to `xt::flip(arr, 1)`.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @param axis the axis along which elements should be reversed
+     * @return returns a view with the result of the flip
+     */
+    template <class E>
+    inline auto flip(E&& e, std::size_t axis)
+    {
+        using shape_type = xindex_type_t<typename std::decay_t<E>::shape_type>;
+
+        shape_type shape;
+        resize_container(shape, e.shape().size());
+        std::copy(e.shape().cbegin(), e.shape().cend(), shape.begin());
+
+        get_strides_t<shape_type> strides;
+        decltype(auto) old_strides = detail::get_strides<XTENSOR_DEFAULT_LAYOUT>(e);
+        resize_container(strides, old_strides.size());
+        std::copy(old_strides.cbegin(), old_strides.cend(), strides.begin());
+
+        strides[axis] *= -1;
+        std::size_t offset = static_cast<std::size_t>(
+            static_cast<std::ptrdiff_t>(e.data_offset())
+            + old_strides[axis] * (static_cast<std::ptrdiff_t>(e.shape()[axis]) - 1)
+        );
+
+        return strided_view(std::forward<E>(e), std::move(shape), std::move(strides), offset);
+    }
+
+    /************************
+     * rot90 implementation *
+     ************************/
+
+    namespace detail
+    {
+        template <std::ptrdiff_t N>
+        struct rot90_impl;
+
+        template <>
+        struct rot90_impl<0>
+        {
+            template <class E>
+            inline auto operator()(E&& e, const std::array<std::size_t, 2>& /*axes*/)
+            {
+                return std::forward<E>(e);
+            }
+        };
+
+        template <>
+        struct rot90_impl<1>
+        {
+            template <class E>
+            inline auto operator()(E&& e, const std::array<std::size_t, 2>& axes)
+            {
+                using std::swap;
+
+                dynamic_shape<std::ptrdiff_t> axes_list(e.shape().size());
+                std::iota(axes_list.begin(), axes_list.end(), 0);
+                swap(axes_list[axes[0]], axes_list[axes[1]]);
+
+                return transpose(flip(std::forward<E>(e), axes[1]), std::move(axes_list));
+            }
+        };
+
+        template <>
+        struct rot90_impl<2>
+        {
+            template <class E>
+            inline auto operator()(E&& e, const std::array<std::size_t, 2>& axes)
+            {
+                return flip(flip(std::forward<E>(e), axes[0]), axes[1]);
+            }
+        };
+
+        template <>
+        struct rot90_impl<3>
+        {
+            template <class E>
+            inline auto operator()(E&& e, const std::array<std::size_t, 2>& axes)
+            {
+                using std::swap;
+
+                dynamic_shape<std::ptrdiff_t> axes_list(e.shape().size());
+                std::iota(axes_list.begin(), axes_list.end(), 0);
+                swap(axes_list[axes[0]], axes_list[axes[1]]);
+
+                return flip(transpose(std::forward<E>(e), std::move(axes_list)), axes[1]);
+            }
+        };
+    }
+
+    /**
+     * Rotate an array by 90 degrees in the plane specified by axes.
+     *
+     * Rotation direction is from the first towards the second axis.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @param axes the array is rotated in the plane defined by the axes. Axes must be different.
+     * @tparam N number of times the array is rotated by 90 degrees. Default is 1.
+     * @return returns a view with the result of the rotation
+     */
+    template <std::ptrdiff_t N, class E>
+    inline auto rot90(E&& e, const std::array<std::ptrdiff_t, 2>& axes)
+    {
+        auto ndim = static_cast<std::ptrdiff_t>(e.shape().size());
+
+        if (axes[0] == axes[1] || std::abs(axes[0] - axes[1]) == ndim)
+        {
+            XTENSOR_THROW(std::runtime_error, "Axes must be different");
+        }
+
+        auto norm_axes = forward_normalize<std::array<std::size_t, 2>>(e, axes);
+        constexpr std::ptrdiff_t n = (4 + (N % 4)) % 4;
+
+        return detail::rot90_impl<n>()(std::forward<E>(e), norm_axes);
+    }
+
+    /***********************
+     * roll implementation *
+     ***********************/
+
+    /**
+     * Roll an expression.
+     *
+     * The expression is flatten before shifting, after which the original
+     * shape is restore. Elements that roll beyond the last position are
+     * re-introduced at the first. This function does not change the input
+     * expression.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @param shift the number of places by which elements are shifted
+     * @return a roll of the input expression
+     */
+    template <class E>
+    inline auto roll(E&& e, std::ptrdiff_t shift)
+    {
+        auto cpy = empty_like(e);
+        auto flat_size = std::accumulate(
+            cpy.shape().begin(),
+            cpy.shape().end(),
+            1L,
+            std::multiplies<std::size_t>()
+        );
+        while (shift < 0)
+        {
+            shift += flat_size;
+        }
+
+        shift %= flat_size;
+        std::copy(e.begin(), e.end() - shift, std::copy(e.end() - shift, e.end(), cpy.begin()));
+
+        return cpy;
+    }
+
+    namespace detail
+    {
+        /**
+         * Algorithm adapted from pythran/pythonic/numpy/roll.hpp
+         */
+
+        template <class To, class From, class S>
+        To roll(To to, From from, std::ptrdiff_t shift, std::size_t axis, const S& shape, std::size_t M)
+        {
+            std::ptrdiff_t dim = std::ptrdiff_t(shape[M]);
+            std::ptrdiff_t offset = std::accumulate(
+                shape.begin() + M + 1,
+                shape.end(),
+                std::ptrdiff_t(1),
+                std::multiplies<std::ptrdiff_t>()
+            );
+            if (shape.size() == M + 1)
+            {
+                if (axis == M)
+                {
+                    const auto split = from + (dim - shift) * offset;
+                    for (auto iter = split, end = from + dim * offset; iter != end; iter += offset, ++to)
+                    {
+                        *to = *iter;
+                    }
+                    for (auto iter = from, end = split; iter != end; iter += offset, ++to)
+                    {
+                        *to = *iter;
+                    }
+                }
+                else
+                {
+                    for (auto iter = from, end = from + dim * offset; iter != end; iter += offset, ++to)
+                    {
+                        *to = *iter;
+                    }
+                }
+            }
+            else
+            {
+                if (axis == M)
+                {
+                    const auto split = from + (dim - shift) * offset;
+                    for (auto iter = split, end = from + dim * offset; iter != end; iter += offset)
+                    {
+                        to = roll(to, iter, shift, axis, shape, M + 1);
+                    }
+                    for (auto iter = from, end = split; iter != end; iter += offset)
+                    {
+                        to = roll(to, iter, shift, axis, shape, M + 1);
+                    }
+                }
+                else
+                {
+                    for (auto iter = from, end = from + dim * offset; iter != end; iter += offset)
+                    {
+                        to = roll(to, iter, shift, axis, shape, M + 1);
+                    }
+                }
+            }
+            return to;
+        }
+    }
+
+    /**
+     * Roll an expression along a given axis.
+     *
+     * Elements that roll beyond the last position are re-introduced at the first.
+     * This function does not change the input expression.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @param shift the number of places by which elements are shifted
+     * @param axis the axis along which elements are shifted.
+     * @return a roll of the input expression
+     */
+    template <class E>
+    inline auto roll(E&& e, std::ptrdiff_t shift, std::ptrdiff_t axis)
+    {
+        auto cpy = empty_like(e);
+        const auto& shape = cpy.shape();
+        std::size_t saxis = static_cast<std::size_t>(axis);
+        if (axis < 0)
+        {
+            axis += std::ptrdiff_t(cpy.dimension());
+        }
+
+        if (saxis >= cpy.dimension() || axis < 0)
+        {
+            XTENSOR_THROW(std::runtime_error, "axis is no within shape dimension.");
+        }
+
+        const auto axis_dim = static_cast<std::ptrdiff_t>(shape[saxis]);
+        while (shift < 0)
+        {
+            shift += axis_dim;
+        }
+
+        detail::roll(cpy.begin(), e.begin(), shift, saxis, shape, 0);
+        return cpy;
+    }
+
+    /****************************
+     * repeat implementation    *
+     ****************************/
+
+    namespace detail
+    {
+        template <class E, class R>
+        inline auto make_xrepeat(E&& e, R&& r, typename std::decay_t<E>::size_type axis)
+        {
+            const auto casted_axis = static_cast<typename std::decay_t<E>::size_type>(axis);
+            if (r.size() != e.shape(casted_axis))
+            {
+                XTENSOR_THROW(std::invalid_argument, "repeats must have the same size as the specified axis");
+            }
+            return xrepeat<const_xclosure_t<E>, R>(std::forward<E>(e), std::forward<R>(r), axis);
+        }
+    }
+
+    /**
+     * Repeat elements of an expression along a given axis.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @param repeats The number of repetition of each elements.
+     *     @p repeats is broadcasted to fit the shape of the given @p axis.
+     * @param axis the axis along which to repeat the value
+     * @return an expression which as the same shape as \ref e, except along the given \ref axis
+     */
+    template <class E>
+    inline auto repeat(E&& e, std::size_t repeats, std::size_t axis)
+    {
+        const auto casted_axis = static_cast<typename std::decay_t<E>::size_type>(axis);
+        std::vector<std::size_t> broadcasted_repeats(e.shape(casted_axis));
+        std::fill(broadcasted_repeats.begin(), broadcasted_repeats.end(), repeats);
+        return repeat(std::forward<E>(e), std::move(broadcasted_repeats), axis);
+    }
+
+    /**
+     * Repeat elements of an expression along a given axis.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @param repeats The number of repetition of each elements.
+     *     The size of @p repeats must match the shape of the given @p axis.
+     * @param axis the axis along which to repeat the value
+     *
+     * @return an expression which as the same shape as \ref e, except along the given \ref axis
+     */
+    template <class E>
+    inline auto repeat(E&& e, const std::vector<std::size_t>& repeats, std::size_t axis)
+    {
+        return detail::make_xrepeat(std::forward<E>(e), repeats, axis);
+    }
+
+    /**
+     * Repeat elements of an expression along a given axis.
+     *
+     * @ingroup xt_xmanipulation
+     * @param e the input xexpression
+     * @param repeats The number of repetition of each elements.
+     *     The size of @p repeats must match the shape of the given @p axis.
+     * @param axis the axis along which to repeat the value
+     * @return an expression which as the same shape as \ref e, except along the given \ref axis
+     */
+    template <class E>
+    inline auto repeat(E&& e, std::vector<std::size_t>&& repeats, std::size_t axis)
+    {
+        return detail::make_xrepeat(std::forward<E>(e), std::move(repeats), axis);
+    }
+}
+
+#endif

+ 676 - 0
3rd/numpy/include/xtensor/xmasked_view.hpp

@@ -0,0 +1,676 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_XMASKED_VIEW_HPP
+#define XTENSOR_XMASKED_VIEW_HPP
+
+#include "xaccessible.hpp"
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+#include "xsemantic.hpp"
+#include "xshape.hpp"
+#include "xtensor_forward.hpp"
+#include "xtl/xmasked_value.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    /****************************
+     * xmasked_view declaration  *
+     *****************************/
+
+    template <class CTD, class CTM>
+    class xmasked_view;
+
+    template <class D, bool is_const>
+    class xmasked_view_stepper;
+
+    template <class T>
+    struct xcontainer_inner_types;
+
+    template <class CTD, class CTM>
+    struct xcontainer_inner_types<xmasked_view<CTD, CTM>>
+    {
+        using data_type = std::decay_t<CTD>;
+        using mask_type = std::decay_t<CTM>;
+        using base_value_type = typename data_type::value_type;
+        using flag_type = typename mask_type::value_type;
+        using val_reference = inner_reference_t<CTD>;
+        using mask_reference = inner_reference_t<CTM>;
+        using value_type = xtl::xmasked_value<base_value_type, flag_type>;
+        using reference = xtl::xmasked_value<val_reference, mask_reference>;
+        using const_reference = xtl::xmasked_value<typename data_type::const_reference, typename mask_type::const_reference>;
+        using size_type = typename data_type::size_type;
+        using temporary_type = xarray<xtl::xmasked_value<base_value_type, flag_type>>;
+    };
+
+    template <class CTD, class CTM>
+    struct xiterable_inner_types<xmasked_view<CTD, CTM>>
+    {
+        using masked_view_type = xmasked_view<CTD, CTM>;
+        using inner_shape_type = typename std::decay_t<CTD>::inner_shape_type;
+        using stepper = xmasked_view_stepper<masked_view_type, false>;
+        using const_stepper = xmasked_view_stepper<masked_view_type, true>;
+    };
+
+    /**
+     * @class xmasked_view
+     * @brief View on an xoptional_assembly or xoptional_assembly_adaptor
+     * hiding values depending on a given mask.
+     *
+     * The xmasked_view class implements a view on an xoptional_assembly or
+     * xoptional_assembly_adaptor, it takes this xoptional_assembly and a
+     * mask as input. The mask is an xexpression containing boolean values,
+     * whenever the value of the mask is false, the optional value of
+     * xmasked_view is considered missing, otherwise it depends on the
+     * underlying xoptional_assembly.
+     *
+     * @tparam CTD The type of expression holding the values.
+     * @tparam CTM The type of expression holding the mask.
+     */
+    template <class CTD, class CTM>
+    class xmasked_view : public xview_semantic<xmasked_view<CTD, CTM>>,
+                         private xaccessible<xmasked_view<CTD, CTM>>,
+                         private xiterable<xmasked_view<CTD, CTM>>
+    {
+    public:
+
+        using self_type = xmasked_view<CTD, CTM>;
+        using semantic_base = xview_semantic<xmasked_view<CTD, CTM>>;
+        using accessible_base = xaccessible<self_type>;
+        using inner_types = xcontainer_inner_types<self_type>;
+        using temporary_type = typename inner_types::temporary_type;
+
+        using data_type = typename inner_types::data_type;
+        using mask_type = typename inner_types::mask_type;
+        using value_expression = CTD;
+        using mask_expression = CTM;
+
+        static constexpr bool is_data_const = std::is_const<std::remove_reference_t<value_expression>>::value;
+
+        using base_value_type = typename inner_types::base_value_type;
+        using base_reference = typename data_type::reference;
+        using base_const_reference = typename data_type::const_reference;
+
+        using flag_type = typename inner_types::flag_type;
+        using flag_reference = typename mask_type::reference;
+        using flag_const_reference = typename mask_type::const_reference;
+
+        using val_reference = typename inner_types::val_reference;
+        using mask_reference = typename inner_types::mask_reference;
+
+        using value_type = typename inner_types::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+
+        using pointer = xtl::xclosure_pointer<reference>;
+        using const_pointer = xtl::xclosure_pointer<const_reference>;
+
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename data_type::difference_type;
+
+        using bool_load_type = xtl::xmasked_value<typename data_type::bool_load_type, mask_type>;
+
+        using shape_type = typename data_type::shape_type;
+        using strides_type = typename data_type::strides_type;
+
+        static constexpr layout_type static_layout = data_type::static_layout;
+        static constexpr bool contiguous_layout = false;
+
+        using inner_shape_type = typename data_type::inner_shape_type;
+        using inner_strides_type = typename data_type::inner_strides_type;
+        using inner_backstrides_type = typename data_type::inner_backstrides_type;
+
+        using expression_tag = xtensor_expression_tag;
+
+        using iterable_base = xiterable<xmasked_view<CTD, CTM>>;
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        template <layout_type L>
+        using layout_iterator = typename iterable_base::template layout_iterator<L>;
+        template <layout_type L>
+        using const_layout_iterator = typename iterable_base::template const_layout_iterator<L>;
+        template <layout_type L>
+        using reverse_layout_iterator = typename iterable_base::template reverse_layout_iterator<L>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = typename iterable_base::template const_reverse_layout_iterator<L>;
+
+        template <class S, layout_type L>
+        using broadcast_iterator = typename iterable_base::template broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_broadcast_iterator = typename iterable_base::template const_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using reverse_broadcast_iterator = typename iterable_base::template reverse_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_reverse_broadcast_iterator = typename iterable_base::template const_reverse_broadcast_iterator<S, L>;
+
+        using iterator = typename iterable_base::iterator;
+        using const_iterator = typename iterable_base::const_iterator;
+        using reverse_iterator = typename iterable_base::reverse_iterator;
+        using const_reverse_iterator = typename iterable_base::const_reverse_iterator;
+
+        template <class D, class M>
+        xmasked_view(D&& data, M&& mask);
+
+        xmasked_view(const xmasked_view&) = default;
+
+        size_type size() const noexcept;
+        const inner_shape_type& shape() const noexcept;
+        const inner_strides_type& strides() const noexcept;
+        const inner_backstrides_type& backstrides() const noexcept;
+        using accessible_base::dimension;
+        using accessible_base::shape;
+
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+        template <class T>
+        void fill(const T& value);
+
+        template <class... Args>
+        reference operator()(Args... args);
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        reference unchecked(Args... args);
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        using accessible_base::at;
+        using accessible_base::operator[];
+        using accessible_base::back;
+        using accessible_base::front;
+        using accessible_base::in_bounds;
+        using accessible_base::periodic;
+
+        template <class It>
+        reference element(It first, It last);
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+
+        data_type& value() noexcept;
+        const data_type& value() const noexcept;
+
+        mask_type& visible() noexcept;
+        const mask_type& visible() const noexcept;
+
+        using iterable_base::begin;
+        using iterable_base::cbegin;
+        using iterable_base::cend;
+        using iterable_base::crbegin;
+        using iterable_base::crend;
+        using iterable_base::end;
+        using iterable_base::rbegin;
+        using iterable_base::rend;
+
+        template <class S>
+        stepper stepper_begin(const S& shape) noexcept;
+        template <class S>
+        stepper stepper_end(const S& shape, layout_type l) noexcept;
+
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type l) const noexcept;
+
+        self_type& operator=(const self_type& rhs);
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+    private:
+
+        CTD m_data;
+        CTM m_mask;
+
+        void assign_temporary_impl(temporary_type&& tmp);
+
+        friend class xiterable<self_type>;
+        friend class xconst_iterable<self_type>;
+        friend class xview_semantic<self_type>;
+        friend class xaccessible<self_type>;
+        friend class xconst_accessible<self_type>;
+    };
+
+    template <class D, bool is_const>
+    class xmasked_view_stepper
+    {
+    public:
+
+        using self_type = xmasked_view_stepper<D, is_const>;
+        using masked_view_type = std::decay_t<D>;
+        using value_type = typename masked_view_type::value_type;
+        using reference = std::
+            conditional_t<is_const, typename masked_view_type::const_reference, typename masked_view_type::reference>;
+        using pointer = std::
+            conditional_t<is_const, typename masked_view_type::const_pointer, typename masked_view_type::pointer>;
+        using size_type = typename masked_view_type::size_type;
+        using difference_type = typename masked_view_type::difference_type;
+        using data_type = typename masked_view_type::data_type;
+        using mask_type = typename masked_view_type::mask_type;
+        using value_stepper = std::conditional_t<is_const, typename data_type::const_stepper, typename data_type::stepper>;
+        using mask_stepper = std::conditional_t<is_const, typename mask_type::const_stepper, typename mask_type::stepper>;
+
+        xmasked_view_stepper(value_stepper vs, mask_stepper fs) noexcept;
+
+
+        void step(size_type dim);
+        void step_back(size_type dim);
+        void step(size_type dim, size_type n);
+        void step_back(size_type dim, size_type n);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type l);
+
+        reference operator*() const;
+
+    private:
+
+        value_stepper m_vs;
+        mask_stepper m_ms;
+    };
+
+    /*******************************
+     * xmasked_view implementation *
+     *******************************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Creates an xmasked_view, given the xoptional_assembly or
+     * xoptional_assembly_adaptor and the mask
+     *
+     * @param data the underlying xoptional_assembly or xoptional_assembly_adaptor
+     * @param mask the mask.
+     */
+    template <class CTD, class CTM>
+    template <class D, class M>
+    inline xmasked_view<CTD, CTM>::xmasked_view(D&& data, M&& mask)
+        : m_data(std::forward<D>(data))
+        , m_mask(std::forward<M>(mask))
+    {
+    }
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the number of elements in the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::size() const noexcept -> size_type
+    {
+        return m_data.size();
+    }
+
+    /**
+     * Returns the shape of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_data.shape();
+    }
+
+    /**
+     * Returns the strides of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::strides() const noexcept -> const inner_strides_type&
+    {
+        return m_data.strides();
+    }
+
+    /**
+     * Returns the backstrides of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::backstrides() const noexcept -> const inner_backstrides_type&
+    {
+        return m_data.backstrides();
+    }
+
+    //@}
+
+    /**
+     * Return the layout_type of the xmasked_view
+     * @return layout_type of the xmasked_view
+     */
+    template <class CTD, class CTM>
+    inline layout_type xmasked_view<CTD, CTM>::layout() const noexcept
+    {
+        return m_data.layout();
+    }
+
+    template <class CTD, class CTM>
+    inline bool xmasked_view<CTD, CTM>::is_contiguous() const noexcept
+    {
+        return false;
+    }
+
+    /**
+     * Fills the data with the given value.
+     * @param value the value to fill the data with.
+     */
+    template <class CTD, class CTM>
+    template <class T>
+    inline void xmasked_view<CTD, CTM>::fill(const T& value)
+    {
+        std::fill(this->begin(), this->end(), value);
+    }
+
+    /**
+     * @name Data
+     */
+    //@{
+    /**
+     * Returns a reference to the element at the specified position in the xmasked_view.
+     * @param args a list of indices specifying the position in the xmasked_view. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    template <class... Args>
+    inline auto xmasked_view<CTD, CTM>::operator()(Args... args) -> reference
+    {
+        return reference(m_data(args...), m_mask(args...));
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the xmasked_view.
+     * @param args a list of indices specifying the position in the xmasked_view. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    template <class... Args>
+    inline auto xmasked_view<CTD, CTM>::operator()(Args... args) const -> const_reference
+    {
+        return const_reference(m_data(args...), m_mask(args...));
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the  xmasked_view.
+     * @param args a list of indices specifying the position in the  xmasked_view. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the  xmasked_view, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class CTD, class CTM>
+    template <class... Args>
+    inline auto xmasked_view<CTD, CTM>::unchecked(Args... args) -> reference
+    {
+        return reference(m_data.unchecked(args...), m_mask.unchecked(args...));
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the xmasked_view.
+     * @param args a list of indices specifying the position in the  xmasked_view. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the  xmasked_view, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class CTD, class CTM>
+    template <class... Args>
+    inline auto xmasked_view<CTD, CTM>::unchecked(Args... args) const -> const_reference
+    {
+        return const_reference(m_data.unchecked(args...), m_mask.unchecked(args...));
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the xmasked_view.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    template <class It>
+    inline auto xmasked_view<CTD, CTM>::element(It first, It last) -> reference
+    {
+        return reference(m_data.element(first, last), m_mask.element(first, last));
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the xmasked_view.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    template <class It>
+    inline auto xmasked_view<CTD, CTM>::element(It first, It last) const -> const_reference
+    {
+        return const_reference(m_data.element(first, last), m_mask.element(first, last));
+    }
+
+    //@}
+
+    template <class CTD, class CTM>
+    template <class S>
+    inline bool xmasked_view<CTD, CTM>::has_linear_assign(const S& strides) const noexcept
+    {
+        return m_data.has_linear_assign(strides) && m_mask.has_linear_assign(strides);
+    }
+
+    /**
+     * Return an expression for the values of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::value() noexcept -> data_type&
+    {
+        return m_data;
+    }
+
+    /**
+     * Return a constant expression for the values of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::value() const noexcept -> const data_type&
+    {
+        return m_data;
+    }
+
+    /**
+     * Return an expression for the mask of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::visible() noexcept -> mask_type&
+    {
+        return m_mask;
+    }
+
+    /**
+     * Return a constant expression for the mask of the xmasked_view.
+     */
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::visible() const noexcept -> const mask_type&
+    {
+        return m_mask;
+    }
+
+    template <class CTD, class CTM>
+    template <class S>
+    inline auto xmasked_view<CTD, CTM>::stepper_begin(const S& shape) noexcept -> stepper
+    {
+        return stepper(value().stepper_begin(shape), visible().stepper_begin(shape));
+    }
+
+    template <class CTD, class CTM>
+    template <class S>
+    inline auto xmasked_view<CTD, CTM>::stepper_end(const S& shape, layout_type l) noexcept -> stepper
+    {
+        return stepper(value().stepper_end(shape, l), visible().stepper_end(shape, l));
+    }
+
+    template <class CTD, class CTM>
+    template <class S>
+    inline auto xmasked_view<CTD, CTM>::stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        return const_stepper(value().stepper_begin(shape), visible().stepper_begin(shape));
+    }
+
+    template <class CTD, class CTM>
+    template <class S>
+    inline auto xmasked_view<CTD, CTM>::stepper_end(const S& shape, layout_type l) const noexcept
+        -> const_stepper
+    {
+        return const_stepper(value().stepper_end(shape, l), visible().stepper_end(shape, l));
+    }
+
+    template <class CTD, class CTM>
+    inline auto xmasked_view<CTD, CTM>::operator=(const self_type& rhs) -> self_type&
+    {
+        temporary_type tmp(rhs);
+        return this->assign_temporary(std::move(tmp));
+    }
+
+    template <class CTD, class CTM>
+    template <class E>
+    inline auto xmasked_view<CTD, CTM>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    template <class CTD, class CTM>
+    template <class E>
+    inline auto xmasked_view<CTD, CTM>::operator=(const E& e) -> disable_xexpression<E, self_type>&
+    {
+        std::fill(this->begin(), this->end(), e);
+        return *this;
+    }
+
+    template <class CTD, class CTM>
+    inline void xmasked_view<CTD, CTM>::assign_temporary_impl(temporary_type&& tmp)
+    {
+        std::copy(tmp.cbegin(), tmp.cend(), this->begin());
+    }
+
+    template <class CTD, class CTM>
+    inline xmasked_view<CTD, CTM> masked_view(CTD&& data, CTM&& mask)
+    {
+        return xmasked_view<CTD, CTM>(std::forward<CTD>(data), std::forward<CTM>(mask));
+    }
+
+    /***************************************
+     * xmasked_view_stepper implementation *
+     ***************************************/
+
+    template <class D, bool C>
+    inline xmasked_view_stepper<D, C>::xmasked_view_stepper(value_stepper vs, mask_stepper ms) noexcept
+        : m_vs(vs)
+        , m_ms(ms)
+    {
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::step(size_type dim)
+    {
+        m_vs.step(dim);
+        m_ms.step(dim);
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::step_back(size_type dim)
+    {
+        m_vs.step_back(dim);
+        m_ms.step_back(dim);
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::step(size_type dim, size_type n)
+    {
+        m_vs.step(dim, n);
+        m_ms.step(dim, n);
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::step_back(size_type dim, size_type n)
+    {
+        m_vs.step_back(dim, n);
+        m_ms.step_back(dim, n);
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::reset(size_type dim)
+    {
+        m_vs.reset(dim);
+        m_ms.reset(dim);
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::reset_back(size_type dim)
+    {
+        m_vs.reset_back(dim);
+        m_ms.reset_back(dim);
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::to_begin()
+    {
+        m_vs.to_begin();
+        m_ms.to_begin();
+    }
+
+    template <class D, bool C>
+    inline void xmasked_view_stepper<D, C>::to_end(layout_type l)
+    {
+        m_vs.to_end(l);
+        m_ms.to_end(l);
+    }
+
+    template <class D, bool C>
+    inline auto xmasked_view_stepper<D, C>::operator*() const -> reference
+    {
+        return reference(*m_vs, *m_ms);
+    }
+}
+
+#endif

+ 3329 - 0
3rd/numpy/include/xtensor/xmath.hpp

@@ -0,0 +1,3329 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+/**
+ * @brief standard mathematical functions for xexpressions
+ */
+
+#ifndef XTENSOR_MATH_HPP
+#define XTENSOR_MATH_HPP
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <complex>
+#include <type_traits>
+
+#include <xtl/xcomplex.hpp>
+#include <xtl/xsequence.hpp>
+#include <xtl/xtype_traits.hpp>
+
+#include "xaccumulator.hpp"
+#include "xeval.hpp"
+#include "xmanipulation.hpp"
+#include "xoperation.hpp"
+#include "xreducer.hpp"
+#include "xslice.hpp"
+#include "xstrided_view.hpp"
+#include "xtensor_config.hpp"
+
+namespace xt
+{
+    template <class T = double>
+    struct numeric_constants
+    {
+        static constexpr T PI = 3.141592653589793238463;
+        static constexpr T PI_2 = 1.57079632679489661923;
+        static constexpr T PI_4 = 0.785398163397448309616;
+        static constexpr T D_1_PI = 0.318309886183790671538;
+        static constexpr T D_2_PI = 0.636619772367581343076;
+        static constexpr T D_2_SQRTPI = 1.12837916709551257390;
+        static constexpr T SQRT2 = 1.41421356237309504880;
+        static constexpr T SQRT1_2 = 0.707106781186547524401;
+        static constexpr T E = 2.71828182845904523536;
+        static constexpr T LOG2E = 1.44269504088896340736;
+        static constexpr T LOG10E = 0.434294481903251827651;
+        static constexpr T LN2 = 0.693147180559945309417;
+    };
+
+    /***********
+     * Helpers *
+     ***********/
+
+#define XTENSOR_UNSIGNED_ABS_FUNC(T)   \
+    constexpr inline T abs(const T& x) \
+    {                                  \
+        return x;                      \
+    }
+
+#define XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, T) \
+    constexpr inline bool FUNC_NAME(const T& /*x*/) noexcept      \
+    {                                                             \
+        return RETURN_VAL;                                        \
+    }
+
+#define XTENSOR_INT_SPECIALIZATION(FUNC_NAME, RETURN_VAL)                   \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, char);           \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, short);          \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, int);            \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, long);           \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, long long);      \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned char);  \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned short); \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned int);   \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned long);  \
+    XTENSOR_INT_SPECIALIZATION_IMPL(FUNC_NAME, RETURN_VAL, unsigned long long);
+
+
+#define XTENSOR_UNARY_MATH_FUNCTOR(NAME)              \
+    struct NAME##_fun                                 \
+    {                                                 \
+        template <class T>                            \
+        constexpr auto operator()(const T& arg) const \
+        {                                             \
+            using math::NAME;                         \
+            return NAME(arg);                         \
+        }                                             \
+        template <class B>                            \
+        constexpr auto simd_apply(const B& arg) const \
+        {                                             \
+            using math::NAME;                         \
+            return NAME(arg);                         \
+        }                                             \
+    }
+
+#define XTENSOR_UNARY_MATH_FUNCTOR_COMPLEX_REDUCING(NAME) \
+    struct NAME##_fun                                     \
+    {                                                     \
+        template <class T>                                \
+        constexpr auto operator()(const T& arg) const     \
+        {                                                 \
+            using math::NAME;                             \
+            return NAME(arg);                             \
+        }                                                 \
+        template <class B>                                \
+        constexpr auto simd_apply(const B& arg) const     \
+        {                                                 \
+            using math::NAME;                             \
+            return NAME(arg);                             \
+        }                                                 \
+    }
+
+#define XTENSOR_BINARY_MATH_FUNCTOR(NAME)                               \
+    struct NAME##_fun                                                   \
+    {                                                                   \
+        template <class T1, class T2>                                   \
+        constexpr auto operator()(const T1& arg1, const T2& arg2) const \
+        {                                                               \
+            using math::NAME;                                           \
+            return NAME(arg1, arg2);                                    \
+        }                                                               \
+        template <class B>                                              \
+        constexpr auto simd_apply(const B& arg1, const B& arg2) const   \
+        {                                                               \
+            using math::NAME;                                           \
+            return NAME(arg1, arg2);                                    \
+        }                                                               \
+    }
+
+#define XTENSOR_TERNARY_MATH_FUNCTOR(NAME)                                              \
+    struct NAME##_fun                                                                   \
+    {                                                                                   \
+        template <class T1, class T2, class T3>                                         \
+        constexpr auto operator()(const T1& arg1, const T2& arg2, const T3& arg3) const \
+        {                                                                               \
+            using math::NAME;                                                           \
+            return NAME(arg1, arg2, arg3);                                              \
+        }                                                                               \
+        template <class B>                                                              \
+        auto simd_apply(const B& arg1, const B& arg2, const B& arg3) const              \
+        {                                                                               \
+            using math::NAME;                                                           \
+            return NAME(arg1, arg2, arg3);                                              \
+        }                                                                               \
+    }
+
+    namespace math
+    {
+        using std::abs;
+        using std::fabs;
+
+        using std::acos;
+        using std::asin;
+        using std::atan;
+        using std::cos;
+        using std::sin;
+        using std::tan;
+
+        using std::acosh;
+        using std::asinh;
+        using std::atanh;
+        using std::cosh;
+        using std::sinh;
+        using std::tanh;
+
+        using std::cbrt;
+        using std::sqrt;
+
+        using std::exp;
+        using std::exp2;
+        using std::expm1;
+        using std::ilogb;
+        using std::log;
+        using std::log10;
+        using std::log1p;
+        using std::log2;
+        using std::logb;
+
+        using std::ceil;
+        using std::floor;
+        using std::llround;
+        using std::lround;
+        using std::nearbyint;
+        using std::remainder;
+        using std::rint;
+        using std::round;
+        using std::trunc;
+
+        using std::erf;
+        using std::erfc;
+        using std::lgamma;
+        using std::tgamma;
+
+        using std::arg;
+        using std::conj;
+        using std::imag;
+        using std::real;
+
+        using std::atan2;
+
+// copysign is not in the std namespace for MSVC
+#if !defined(_MSC_VER)
+        using std::copysign;
+#endif
+        using std::fdim;
+        using std::fmax;
+        using std::fmin;
+        using std::fmod;
+        using std::hypot;
+        using std::pow;
+
+        using std::fma;
+        using std::fpclassify;
+
+        // Overload isinf, isnan and isfinite because glibc implementation
+        // might return int instead of bool and the SIMD detection requires
+        // bool return type.
+        template <class T>
+        inline std::enable_if_t<xtl::is_arithmetic<T>::value, bool> isinf(const T& t)
+        {
+            return bool(std::isinf(t));
+        }
+
+        template <class T>
+        inline std::enable_if_t<xtl::is_arithmetic<T>::value, bool> isnan(const T& t)
+        {
+            return bool(std::isnan(t));
+        }
+
+        template <class T>
+        inline std::enable_if_t<xtl::is_arithmetic<T>::value, bool> isfinite(const T& t)
+        {
+            return bool(std::isfinite(t));
+        }
+
+        // Overload isinf, isnan and isfinite for complex datatypes,
+        // following the Python specification:
+        template <class T>
+        inline bool isinf(const std::complex<T>& c)
+        {
+            return std::isinf(std::real(c)) || std::isinf(std::imag(c));
+        }
+
+        template <class T>
+        inline bool isnan(const std::complex<T>& c)
+        {
+            return std::isnan(std::real(c)) || std::isnan(std::imag(c));
+        }
+
+        template <class T>
+        inline bool isfinite(const std::complex<T>& c)
+        {
+            return !isinf(c) && !isnan(c);
+        }
+
+        // VS2015 STL defines isnan, isinf and isfinite as template
+        // functions, breaking ADL.
+#if defined(_WIN32) && defined(XTENSOR_USE_XSIMD)
+        /*template <class T, class A>
+        inline xsimd::batch_bool<T, A> isinf(const xsimd::batch<T, A>& b)
+        {
+            return xsimd::isinf(b);
+        }
+        template <class T, class A>
+        inline xsimd::batch_bool<T, A> isnan(const xsimd::batch<T, A>& b)
+        {
+            return xsimd::isnan(b);
+        }
+        template <class T, class A>
+        inline xsimd::batch_bool<T, A> isfinite(const xsimd::batch<T, A>& b)
+        {
+            return xsimd::isfinite(b);
+        }*/
+#endif
+        // The following specializations are needed to avoid 'ambiguous overload' errors,
+        // whereas 'unsigned char' and 'unsigned short' are automatically converted to 'int'.
+        // we're still adding those functions to silence warnings
+        XTENSOR_UNSIGNED_ABS_FUNC(unsigned char)
+        XTENSOR_UNSIGNED_ABS_FUNC(unsigned short)
+        XTENSOR_UNSIGNED_ABS_FUNC(unsigned int)
+        XTENSOR_UNSIGNED_ABS_FUNC(unsigned long)
+        XTENSOR_UNSIGNED_ABS_FUNC(unsigned long long)
+
+#ifdef _WIN32
+        XTENSOR_INT_SPECIALIZATION(isinf, false);
+        XTENSOR_INT_SPECIALIZATION(isnan, false);
+        XTENSOR_INT_SPECIALIZATION(isfinite, true);
+#endif
+
+        XTENSOR_UNARY_MATH_FUNCTOR_COMPLEX_REDUCING(abs);
+
+        XTENSOR_UNARY_MATH_FUNCTOR(fabs);
+        XTENSOR_BINARY_MATH_FUNCTOR(fmod);
+        XTENSOR_BINARY_MATH_FUNCTOR(remainder);
+        XTENSOR_TERNARY_MATH_FUNCTOR(fma);
+        XTENSOR_BINARY_MATH_FUNCTOR(fmax);
+        XTENSOR_BINARY_MATH_FUNCTOR(fmin);
+        XTENSOR_BINARY_MATH_FUNCTOR(fdim);
+        XTENSOR_UNARY_MATH_FUNCTOR(exp);
+        XTENSOR_UNARY_MATH_FUNCTOR(exp2);
+        XTENSOR_UNARY_MATH_FUNCTOR(expm1);
+        XTENSOR_UNARY_MATH_FUNCTOR(log);
+        XTENSOR_UNARY_MATH_FUNCTOR(log10);
+        XTENSOR_UNARY_MATH_FUNCTOR(log2);
+        XTENSOR_UNARY_MATH_FUNCTOR(log1p);
+        XTENSOR_BINARY_MATH_FUNCTOR(pow);
+        XTENSOR_UNARY_MATH_FUNCTOR(sqrt);
+        XTENSOR_UNARY_MATH_FUNCTOR(cbrt);
+        XTENSOR_BINARY_MATH_FUNCTOR(hypot);
+        XTENSOR_UNARY_MATH_FUNCTOR(sin);
+        XTENSOR_UNARY_MATH_FUNCTOR(cos);
+        XTENSOR_UNARY_MATH_FUNCTOR(tan);
+        XTENSOR_UNARY_MATH_FUNCTOR(asin);
+        XTENSOR_UNARY_MATH_FUNCTOR(acos);
+        XTENSOR_UNARY_MATH_FUNCTOR(atan);
+        XTENSOR_BINARY_MATH_FUNCTOR(atan2);
+        XTENSOR_UNARY_MATH_FUNCTOR(sinh);
+        XTENSOR_UNARY_MATH_FUNCTOR(cosh);
+        XTENSOR_UNARY_MATH_FUNCTOR(tanh);
+        XTENSOR_UNARY_MATH_FUNCTOR(asinh);
+        XTENSOR_UNARY_MATH_FUNCTOR(acosh);
+        XTENSOR_UNARY_MATH_FUNCTOR(atanh);
+        XTENSOR_UNARY_MATH_FUNCTOR(erf);
+        XTENSOR_UNARY_MATH_FUNCTOR(erfc);
+        XTENSOR_UNARY_MATH_FUNCTOR(tgamma);
+        XTENSOR_UNARY_MATH_FUNCTOR(lgamma);
+        XTENSOR_UNARY_MATH_FUNCTOR(ceil);
+        XTENSOR_UNARY_MATH_FUNCTOR(floor);
+        XTENSOR_UNARY_MATH_FUNCTOR(trunc);
+        XTENSOR_UNARY_MATH_FUNCTOR(round);
+        XTENSOR_UNARY_MATH_FUNCTOR(nearbyint);
+        XTENSOR_UNARY_MATH_FUNCTOR(rint);
+        XTENSOR_UNARY_MATH_FUNCTOR(isfinite);
+        XTENSOR_UNARY_MATH_FUNCTOR(isinf);
+        XTENSOR_UNARY_MATH_FUNCTOR(isnan);
+    }
+
+#undef XTENSOR_UNARY_MATH_FUNCTOR
+#undef XTENSOR_BINARY_MATH_FUNCTOR
+#undef XTENSOR_TERNARY_MATH_FUNCTOR
+#undef XTENSOR_UNARY_MATH_FUNCTOR_COMPLEX_REDUCING
+#undef XTENSOR_UNSIGNED_ABS_FUNC
+
+    namespace detail
+    {
+        template <class R, class T>
+        std::enable_if_t<!has_iterator_interface<R>::value, R> fill_init(T init)
+        {
+            return R(init);
+        }
+
+        template <class R, class T>
+        std::enable_if_t<has_iterator_interface<R>::value, R> fill_init(T init)
+        {
+            R result;
+            std::fill(std::begin(result), std::end(result), init);
+            return result;
+        }
+    }
+
+#define XTENSOR_REDUCER_FUNCTION(NAME, FUNCTOR, INIT_VALUE_TYPE, INIT)                                               \
+    template <                                                                                                       \
+        class T = void,                                                                                              \
+        class E,                                                                                                     \
+        class X,                                                                                                     \
+        class EVS = DEFAULT_STRATEGY_REDUCERS,                                                                       \
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<xtl::is_integral<std::decay_t<X>>>)>        \
+    inline auto NAME(E&& e, X&& axes, EVS es = EVS())                                                                \
+    {                                                                                                                \
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, INIT_VALUE_TYPE, T>;                \
+        using functor_type = FUNCTOR;                                                                                \
+        using init_value_fct = xt::const_value<init_value_type>;                                                     \
+        return xt::reduce(                                                                                           \
+            make_xreducer_functor(functor_type(), init_value_fct(detail::fill_init<init_value_type>(INIT))),         \
+            std::forward<E>(e),                                                                                      \
+            std::forward<X>(axes),                                                                                   \
+            es                                                                                                       \
+        );                                                                                                           \
+    }                                                                                                                \
+                                                                                                                     \
+    template <                                                                                                       \
+        class T = void,                                                                                              \
+        class E,                                                                                                     \
+        class X,                                                                                                     \
+        class EVS = DEFAULT_STRATEGY_REDUCERS,                                                                       \
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::is_integral<std::decay_t<X>>)>                       \
+    inline auto NAME(E&& e, X axis, EVS es = EVS())                                                                  \
+    {                                                                                                                \
+        return NAME(std::forward<E>(e), {axis}, es);                                                                 \
+    }                                                                                                                \
+                                                                                                                     \
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)> \
+    inline auto NAME(E&& e, EVS es = EVS())                                                                          \
+    {                                                                                                                \
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, INIT_VALUE_TYPE, T>;                \
+        using functor_type = FUNCTOR;                                                                                \
+        using init_value_fct = xt::const_value<init_value_type>;                                                     \
+        return xt::reduce(                                                                                           \
+            make_xreducer_functor(functor_type(), init_value_fct(detail::fill_init<init_value_type>(INIT))),         \
+            std::forward<E>(e),                                                                                      \
+            es                                                                                                       \
+        );                                                                                                           \
+    }                                                                                                                \
+                                                                                                                     \
+    template <class T = void, class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>                \
+    inline auto NAME(E&& e, const I(&axes)[N], EVS es = EVS())                                                       \
+    {                                                                                                                \
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, INIT_VALUE_TYPE, T>;                \
+        using functor_type = FUNCTOR;                                                                                \
+        using init_value_fct = xt::const_value<init_value_type>;                                                     \
+        return xt::reduce(                                                                                           \
+            make_xreducer_functor(functor_type(), init_value_fct(detail::fill_init<init_value_type>(INIT))),         \
+            std::forward<E>(e),                                                                                      \
+            axes,                                                                                                    \
+            es                                                                                                       \
+        );                                                                                                           \
+    }
+
+    /*******************
+     * basic functions *
+     *******************/
+
+    /**
+     * @defgroup basic_functions Basic functions
+     */
+
+    /**
+     * @ingroup basic_functions
+     * @brief Absolute value function.
+     *
+     * Returns an \ref xfunction for the element-wise absolute value
+     * of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto abs(E&& e) noexcept -> detail::xfunction_type_t<math::abs_fun, E>
+    {
+        return detail::make_xfunction<math::abs_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Absolute value function.
+     *
+     * Returns an \ref xfunction for the element-wise absolute value
+     * of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto fabs(E&& e) noexcept -> detail::xfunction_type_t<math::fabs_fun, E>
+    {
+        return detail::make_xfunction<math::fabs_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Remainder of the floating point division operation.
+     *
+     * Returns an \ref xfunction for the element-wise remainder of
+     * the floating point division operation <em>e1 / e2</em>.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto fmod(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::fmod_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::fmod_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Signed remainder of the division operation.
+     *
+     * Returns an \ref xfunction for the element-wise signed remainder
+     * of the floating point division operation <em>e1 / e2</em>.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto remainder(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::remainder_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::remainder_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Fused multiply-add operation.
+     *
+     * Returns an \ref xfunction for <em>e1 * e2 + e3</em> as if
+     * to infinite precision and rounded only once to fit the result type.
+     * @param e1 an \ref xfunction or a scalar
+     * @param e2 an \ref xfunction or a scalar
+     * @param e3 an \ref xfunction or a scalar
+     * @return an \ref xfunction
+     * @note e1, e2 and e3 can't be scalars every three.
+     */
+    template <class E1, class E2, class E3>
+    inline auto fma(E1&& e1, E2&& e2, E3&& e3) noexcept -> detail::xfunction_type_t<math::fma_fun, E1, E2, E3>
+    {
+        return detail::make_xfunction<math::fma_fun>(
+            std::forward<E1>(e1),
+            std::forward<E2>(e2),
+            std::forward<E3>(e3)
+        );
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Maximum function.
+     *
+     * Returns an \ref xfunction for the element-wise maximum
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto fmax(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::fmax_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::fmax_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Minimum function.
+     *
+     * Returns an \ref xfunction for the element-wise minimum
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto fmin(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::fmin_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::fmin_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Positive difference function.
+     *
+     * Returns an \ref xfunction for the element-wise positive
+     * difference of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto fdim(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::fdim_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::fdim_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    namespace math
+    {
+        template <class T = void>
+        struct minimum
+        {
+            template <class A1, class A2>
+            constexpr auto operator()(const A1& t1, const A2& t2) const noexcept
+            {
+                return xtl::select(t1 < t2, t1, t2);
+            }
+
+            template <class A1, class A2>
+            constexpr auto simd_apply(const A1& t1, const A2& t2) const noexcept
+            {
+                return xt_simd::select(t1 < t2, t1, t2);
+            }
+        };
+
+        template <class T = void>
+        struct maximum
+        {
+            template <class A1, class A2>
+            constexpr auto operator()(const A1& t1, const A2& t2) const noexcept
+            {
+                return xtl::select(t1 > t2, t1, t2);
+            }
+
+            template <class A1, class A2>
+            constexpr auto simd_apply(const A1& t1, const A2& t2) const noexcept
+            {
+                return xt_simd::select(t1 > t2, t1, t2);
+            }
+        };
+
+        struct clamp_fun
+        {
+            template <class A1, class A2, class A3>
+            constexpr auto operator()(const A1& v, const A2& lo, const A3& hi) const
+            {
+                return xtl::select(v < lo, lo, xtl::select(hi < v, hi, v));
+            }
+
+            template <class A1, class A2, class A3>
+            constexpr auto simd_apply(const A1& v, const A2& lo, const A3& hi) const
+            {
+                return xt_simd::select(v < lo, lo, xt_simd::select(hi < v, hi, v));
+            }
+        };
+
+        struct deg2rad
+        {
+            template <class A, std::enable_if_t<xtl::is_integral<A>::value, int> = 0>
+            constexpr double operator()(const A& a) const noexcept
+            {
+                return a * xt::numeric_constants<double>::PI / 180.0;
+            }
+
+            template <class A, std::enable_if_t<std::is_floating_point<A>::value, int> = 0>
+            constexpr auto operator()(const A& a) const noexcept
+            {
+                return a * xt::numeric_constants<A>::PI / A(180.0);
+            }
+
+            template <class A, std::enable_if_t<xtl::is_integral<A>::value, int> = 0>
+            constexpr double simd_apply(const A& a) const noexcept
+            {
+                return a * xt::numeric_constants<double>::PI / 180.0;
+            }
+
+            template <class A, std::enable_if_t<std::is_floating_point<A>::value, int> = 0>
+            constexpr auto simd_apply(const A& a) const noexcept
+            {
+                return a * xt::numeric_constants<A>::PI / A(180.0);
+            }
+        };
+
+        struct rad2deg
+        {
+            template <class A, std::enable_if_t<xtl::is_integral<A>::value, int> = 0>
+            constexpr double operator()(const A& a) const noexcept
+            {
+                return a * 180.0 / xt::numeric_constants<double>::PI;
+            }
+
+            template <class A, std::enable_if_t<std::is_floating_point<A>::value, int> = 0>
+            constexpr auto operator()(const A& a) const noexcept
+            {
+                return a * A(180.0) / xt::numeric_constants<A>::PI;
+            }
+
+            template <class A, std::enable_if_t<xtl::is_integral<A>::value, int> = 0>
+            constexpr double simd_apply(const A& a) const noexcept
+            {
+                return a * 180.0 / xt::numeric_constants<double>::PI;
+            }
+
+            template <class A, std::enable_if_t<std::is_floating_point<A>::value, int> = 0>
+            constexpr auto simd_apply(const A& a) const noexcept
+            {
+                return a * A(180.0) / xt::numeric_constants<A>::PI;
+            }
+        };
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Convert angles from degrees to radians.
+     *
+     * Returns an \ref xfunction for the element-wise corresponding
+     * angle in radians of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto deg2rad(E&& e) noexcept -> detail::xfunction_type_t<math::deg2rad, E>
+    {
+        return detail::make_xfunction<math::deg2rad>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Convert angles from degrees to radians.
+     *
+     * Returns an \ref xfunction for the element-wise corresponding
+     * angle in radians of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto radians(E&& e) noexcept -> detail::xfunction_type_t<math::deg2rad, E>
+    {
+        return detail::make_xfunction<math::deg2rad>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Convert angles from radians to degrees.
+     *
+     * Returns an \ref xfunction for the element-wise corresponding
+     * angle in degrees of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto rad2deg(E&& e) noexcept -> detail::xfunction_type_t<math::rad2deg, E>
+    {
+        return detail::make_xfunction<math::rad2deg>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Convert angles from radians to degrees.
+     *
+     * Returns an \ref xfunction for the element-wise corresponding
+     * angle in degrees of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto degrees(E&& e) noexcept -> detail::xfunction_type_t<math::rad2deg, E>
+    {
+        return detail::make_xfunction<math::rad2deg>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Elementwise maximum
+     *
+     * Returns an \ref xfunction for the element-wise
+     * maximum between e1 and e2.
+     * @param e1 an \ref xexpression
+     * @param e2 an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto maximum(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::maximum<void>, E1, E2>
+    {
+        return detail::make_xfunction<math::maximum<void>>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Elementwise minimum
+     *
+     * Returns an \ref xfunction for the element-wise
+     * minimum between e1 and e2.
+     * @param e1 an \ref xexpression
+     * @param e2 an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto minimum(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::minimum<void>, E1, E2>
+    {
+        return detail::make_xfunction<math::minimum<void>>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Maximum element along given axis.
+     *
+     * Returns an \ref xreducer for the maximum of elements over given
+     * \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the maximum is found (optional)
+     * @param es evaluation strategy of the reducer
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(
+        amax,
+        math::maximum<void>,
+        typename std::decay_t<E>::value_type,
+        std::numeric_limits<xvalue_type_t<std::decay_t<E>>>::lowest()
+    )
+
+    /**
+     * @ingroup basic_functions
+     * @brief Minimum element along given axis.
+     *
+     * Returns an \ref xreducer for the minimum of elements over given
+     * \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the minimum is found (optional)
+     * @param es evaluation strategy of the reducer
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(
+        amin,
+        math::minimum<void>,
+        typename std::decay_t<E>::value_type,
+        std::numeric_limits<xvalue_type_t<std::decay_t<E>>>::max()
+    )
+
+    /**
+     * @ingroup basic_functions
+     * @brief Clip values between hi and lo
+     *
+     * Returns an \ref xfunction for the element-wise clipped
+     * values between lo and hi
+     * @param e1 an \ref xexpression or a scalar
+     * @param lo a scalar
+     * @param hi a scalar
+     *
+     * @return a \ref xfunction
+     */
+    template <class E1, class E2, class E3>
+    inline auto clip(E1&& e1, E2&& lo, E3&& hi) noexcept
+        -> detail::xfunction_type_t<math::clamp_fun, E1, E2, E3>
+    {
+        return detail::make_xfunction<math::clamp_fun>(
+            std::forward<E1>(e1),
+            std::forward<E2>(lo),
+            std::forward<E3>(hi)
+        );
+    }
+
+    namespace math
+    {
+        template <class T>
+        struct sign_impl
+        {
+            template <class XT = T>
+            static constexpr std::enable_if_t<xtl::is_signed<XT>::value, T> run(T x)
+            {
+                return std::isnan(x) ? std::numeric_limits<T>::quiet_NaN()
+                       : x == 0      ? T(copysign(T(0), x))
+                                     : T(copysign(T(1), x));
+            }
+
+            template <class XT = T>
+            static constexpr std::enable_if_t<xtl::is_complex<XT>::value, T> run(T x)
+            {
+                return T(
+                    sign_impl<typename T::value_type>::run(
+                        (x.real() != typename T::value_type(0)) ? x.real() : x.imag()
+                    ),
+                    0
+                );
+            }
+
+            template <class XT = T>
+            static constexpr std::enable_if_t<std::is_unsigned<XT>::value, T> run(T x)
+            {
+                return T(x > T(0));
+            }
+        };
+
+        struct sign_fun
+        {
+            template <class T>
+            constexpr auto operator()(const T& x) const
+            {
+                return sign_impl<T>::run(x);
+            }
+        };
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Returns an element-wise indication of the sign of a number
+     *
+     * If the number is positive, returns +1. If negative, -1. If the number
+     * is zero, returns 0.
+     *
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto sign(E&& e) noexcept -> detail::xfunction_type_t<math::sign_fun, E>
+    {
+        return detail::make_xfunction<math::sign_fun>(std::forward<E>(e));
+    }
+
+    /*************************
+     * exponential functions *
+     *************************/
+
+    /**
+     * @defgroup exp_functions Exponential functions
+     */
+
+    /**
+     * @ingroup exp_functions
+     * @brief Natural exponential function.
+     *
+     * Returns an \ref xfunction for the element-wise natural
+     * exponential of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto exp(E&& e) noexcept -> detail::xfunction_type_t<math::exp_fun, E>
+    {
+        return detail::make_xfunction<math::exp_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup exp_functions
+     * @brief Base 2 exponential function.
+     *
+     * Returns an \ref xfunction for the element-wise base 2
+     * exponential of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto exp2(E&& e) noexcept -> detail::xfunction_type_t<math::exp2_fun, E>
+    {
+        return detail::make_xfunction<math::exp2_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup exp_functions
+     * @brief Natural exponential minus one function.
+     *
+     * Returns an \ref xfunction for the element-wise natural
+     * exponential of \em e, minus 1.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto expm1(E&& e) noexcept -> detail::xfunction_type_t<math::expm1_fun, E>
+    {
+        return detail::make_xfunction<math::expm1_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup exp_functions
+     * @brief Natural logarithm function.
+     *
+     * Returns an \ref xfunction for the element-wise natural
+     * logarithm of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto log(E&& e) noexcept -> detail::xfunction_type_t<math::log_fun, E>
+    {
+        return detail::make_xfunction<math::log_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup exp_functions
+     * @brief Base 10 logarithm function.
+     *
+     * Returns an \ref xfunction for the element-wise base 10
+     * logarithm of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto log10(E&& e) noexcept -> detail::xfunction_type_t<math::log10_fun, E>
+    {
+        return detail::make_xfunction<math::log10_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup exp_functions
+     * @brief Base 2 logarithm function.
+     *
+     * Returns an \ref xfunction for the element-wise base 2
+     * logarithm of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto log2(E&& e) noexcept -> detail::xfunction_type_t<math::log2_fun, E>
+    {
+        return detail::make_xfunction<math::log2_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup exp_functions
+     * @brief Natural logarithm of one plus function.
+     *
+     * Returns an \ref xfunction for the element-wise natural
+     * logarithm of \em e, plus 1.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto log1p(E&& e) noexcept -> detail::xfunction_type_t<math::log1p_fun, E>
+    {
+        return detail::make_xfunction<math::log1p_fun>(std::forward<E>(e));
+    }
+
+    /*******************
+     * power functions *
+     *******************/
+
+    /**
+     * @defgroup pow_functions Power functions
+     */
+
+    /**
+     * @ingroup pow_functions
+     * @brief Power function.
+     *
+     * Returns an \ref xfunction for the element-wise value of
+     * of \em e1 raised to the power \em e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto pow(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::pow_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::pow_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    namespace detail
+    {
+        template <class F, class... T, typename = decltype(std::declval<F>()(std::declval<T>()...))>
+        std::true_type supports_test(const F&, const T&...);
+        std::false_type supports_test(...);
+
+        template <class... T>
+        struct supports;
+
+        template <class F, class... T>
+        struct supports<F(T...)> : decltype(supports_test(std::declval<F>(), std::declval<T>()...))
+        {
+        };
+
+        template <class F>
+        struct lambda_adapt
+        {
+            explicit lambda_adapt(F&& lmbd)
+                : m_lambda(std::move(lmbd))
+            {
+            }
+
+            template <class... T>
+            auto operator()(T... args) const
+            {
+                return m_lambda(args...);
+            }
+
+            template <class... T, XTL_REQUIRES(detail::supports<F(T...)>)>
+            auto simd_apply(T... args) const
+            {
+                return m_lambda(args...);
+            }
+
+            F m_lambda;
+        };
+    }
+
+    /**
+     * Create a xfunction from a lambda
+     *
+     * This function can be used to easily create performant xfunctions from lambdas:
+     *
+     * @code{cpp}
+     * template <class E1>
+     * inline auto square(E1&& e1) noexcept
+     * {
+     *     auto fnct = [](auto x) -> decltype(x * x) {
+     *         return x * x;
+     *     };
+     *     return make_lambda_xfunction(std::move(fnct), std::forward<E1>(e1));
+     * }
+     * @endcode
+     *
+     * Lambda function allow the reusal of a single arguments in multiple places (otherwise
+     * only correctly possible when using xshared_expressions). ``auto`` lambda functions are
+     * automatically vectorized with ``xsimd`` if possible (note that the trailing
+     * ``-> decltype(...)`` is mandatory for the feature detection to work).
+     *
+     * @param lambda the lambda to be vectorized
+     * @param args forwarded arguments
+     *
+     * @return lazy xfunction
+     */
+    template <class F, class... E>
+    inline auto make_lambda_xfunction(F&& lambda, E&&... args)
+    {
+        using xfunction_type = typename detail::xfunction_type<detail::lambda_adapt<F>, E...>::type;
+        return xfunction_type(detail::lambda_adapt<F>(std::forward<F>(lambda)), std::forward<E>(args)...);
+    }
+
+#define XTENSOR_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+
+// Workaround for MSVC 2015 & GCC 4.9
+#if (defined(_MSC_VER) && _MSC_VER < 1910) || (defined(__GNUC__) && GCC_VERSION < 49999)
+#define XTENSOR_DISABLE_LAMBDA_FCT
+#endif
+
+#ifdef XTENSOR_DISABLE_LAMBDA_FCT
+    struct square_fct
+    {
+        template <class T>
+        auto operator()(T x) const -> decltype(x * x)
+        {
+            return x * x;
+        }
+    };
+
+    struct cube_fct
+    {
+        template <class T>
+        auto operator()(T x) const -> decltype(x * x * x)
+        {
+            return x * x * x;
+        }
+    };
+#endif
+
+    /**
+     * @ingroup pow_functions
+     * @brief Square power function, equivalent to e1 * e1.
+     *
+     * Returns an \ref xfunction for the element-wise value of
+     * of \em e1 * \em e1.
+     * @param e1 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1>
+    inline auto square(E1&& e1) noexcept
+    {
+#ifdef XTENSOR_DISABLE_LAMBDA_FCT
+        return make_lambda_xfunction(square_fct{}, std::forward<E1>(e1));
+#else
+        auto fnct = [](auto x) -> decltype(x * x)
+        {
+            return x * x;
+        };
+        return make_lambda_xfunction(std::move(fnct), std::forward<E1>(e1));
+#endif
+    }
+
+    /**
+     * @ingroup pow_functions
+     * @brief Cube power function, equivalent to e1 * e1 * e1.
+     *
+     * Returns an \ref xfunction for the element-wise value of
+     * of \em e1 * \em e1.
+     * @param e1 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1>
+    inline auto cube(E1&& e1) noexcept
+    {
+#ifdef XTENSOR_DISABLE_LAMBDA_FCT
+        return make_lambda_xfunction(cube_fct{}, std::forward<E1>(e1));
+#else
+        auto fnct = [](auto x) -> decltype(x * x * x)
+        {
+            return x * x * x;
+        };
+        return make_lambda_xfunction(std::move(fnct), std::forward<E1>(e1));
+#endif
+    }
+
+#undef XTENSOR_GCC_VERSION
+#undef XTENSOR_DISABLE_LAMBDA_FCT
+
+    namespace detail
+    {
+        // Thanks to Matt Pharr in http://pbrt.org/hair.pdf
+        template <std::size_t N>
+        struct pow_impl;
+
+        template <std::size_t N>
+        struct pow_impl
+        {
+            template <class T>
+            auto operator()(T v) const -> decltype(v * v)
+            {
+                T temp = pow_impl<N / 2>{}(v);
+                return temp * temp * pow_impl<N & 1>{}(v);
+            }
+        };
+
+        template <>
+        struct pow_impl<1>
+        {
+            template <class T>
+            auto operator()(T v) const -> T
+            {
+                return v;
+            }
+        };
+
+        template <>
+        struct pow_impl<0>
+        {
+            template <class T>
+            auto operator()(T /*v*/) const -> T
+            {
+                return T(1);
+            }
+        };
+    }
+
+    /**
+     * @ingroup pow_functions
+     * @brief Integer power function.
+     *
+     * Returns an \ref xfunction for the element-wise power of e1 to
+     * an integral constant.
+     *
+     * Instead of computing the power by using the (expensive) logarithm, this function
+     * computes the power in a number of straight-forward multiplication steps. This function
+     * is therefore much faster (even for high N) than the generic pow-function.
+     *
+     * For example, `e1^20` can be expressed as `(((e1^2)^2)^2)^2*(e1^2)^2`, which is just 5 multiplications.
+     *
+     * @param e an \ref xexpression
+     * @tparam N the exponent (has to be positive integer)
+     * @return an \ref xfunction
+     */
+    template <std::size_t N, class E>
+    inline auto pow(E&& e) noexcept
+    {
+        static_assert(N > 0, "integer power cannot be negative");
+        return make_lambda_xfunction(detail::pow_impl<N>{}, std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup pow_functions
+     * @brief Square root function.
+     *
+     * Returns an \ref xfunction for the element-wise square
+     * root of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto sqrt(E&& e) noexcept -> detail::xfunction_type_t<math::sqrt_fun, E>
+    {
+        return detail::make_xfunction<math::sqrt_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup pow_functions
+     * @brief Cubic root function.
+     *
+     * Returns an \ref xfunction for the element-wise cubic
+     * root of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto cbrt(E&& e) noexcept -> detail::xfunction_type_t<math::cbrt_fun, E>
+    {
+        return detail::make_xfunction<math::cbrt_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup pow_functions
+     * @brief Hypotenuse function.
+     *
+     * Returns an \ref xfunction for the element-wise square
+     * root of the sum of the square of \em e1 and \em e2, avoiding
+     * overflow and underflow at intermediate stages of computation.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto hypot(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::hypot_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::hypot_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /***************************
+     * trigonometric functions *
+     ***************************/
+
+    /**
+     * @defgroup trigo_functions Trigonometric function
+     */
+
+    /**
+     * @ingroup trigo_functions
+     * @brief Sine function.
+     *
+     * Returns an \ref xfunction for the element-wise sine
+     * of \em e (measured in radians).
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto sin(E&& e) noexcept -> detail::xfunction_type_t<math::sin_fun, E>
+    {
+        return detail::make_xfunction<math::sin_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup trigo_functions
+     * @brief Cosine function.
+     *
+     * Returns an \ref xfunction for the element-wise cosine
+     * of \em e (measured in radians).
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto cos(E&& e) noexcept -> detail::xfunction_type_t<math::cos_fun, E>
+    {
+        return detail::make_xfunction<math::cos_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup trigo_functions
+     * @brief Tangent function.
+     *
+     * Returns an \ref xfunction for the element-wise tangent
+     * of \em e (measured in radians).
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto tan(E&& e) noexcept -> detail::xfunction_type_t<math::tan_fun, E>
+    {
+        return detail::make_xfunction<math::tan_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup trigo_functions
+     * @brief Arcsine function.
+     *
+     * Returns an \ref xfunction for the element-wise arcsine
+     * of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto asin(E&& e) noexcept -> detail::xfunction_type_t<math::asin_fun, E>
+    {
+        return detail::make_xfunction<math::asin_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup trigo_functions
+     * @brief Arccosine function.
+     *
+     * Returns an \ref xfunction for the element-wise arccosine
+     * of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto acos(E&& e) noexcept -> detail::xfunction_type_t<math::acos_fun, E>
+    {
+        return detail::make_xfunction<math::acos_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup trigo_functions
+     * @brief Arctangent function.
+     *
+     * Returns an \ref xfunction for the element-wise arctangent
+     * of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto atan(E&& e) noexcept -> detail::xfunction_type_t<math::atan_fun, E>
+    {
+        return detail::make_xfunction<math::atan_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup trigo_functions
+     * @brief Artangent function, using signs to determine quadrants.
+     *
+     * Returns an \ref xfunction for the element-wise arctangent
+     * of <em>e1 / e2</em>, using the signs of arguments to determine the
+     * correct quadrant.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     * @note e1 and e2 can't be both scalars.
+     */
+    template <class E1, class E2>
+    inline auto atan2(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<math::atan2_fun, E1, E2>
+    {
+        return detail::make_xfunction<math::atan2_fun>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /************************
+     * hyperbolic functions *
+     ************************/
+
+    /**
+     * @defgroup hyper_functions Hyperbolic functions
+     */
+
+    /**
+     * @ingroup hyper_functions
+     * @brief Hyperbolic sine function.
+     *
+     * Returns an \ref xfunction for the element-wise hyperbolic
+     * sine of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto sinh(E&& e) noexcept -> detail::xfunction_type_t<math::sinh_fun, E>
+    {
+        return detail::make_xfunction<math::sinh_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup hyper_functions
+     * @brief Hyperbolic cosine function.
+     *
+     * Returns an \ref xfunction for the element-wise hyperbolic
+     * cosine of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto cosh(E&& e) noexcept -> detail::xfunction_type_t<math::cosh_fun, E>
+    {
+        return detail::make_xfunction<math::cosh_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup hyper_functions
+     * @brief Hyperbolic tangent function.
+     *
+     * Returns an \ref xfunction for the element-wise hyperbolic
+     * tangent of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto tanh(E&& e) noexcept -> detail::xfunction_type_t<math::tanh_fun, E>
+    {
+        return detail::make_xfunction<math::tanh_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup hyper_functions
+     * @brief Inverse hyperbolic sine function.
+     *
+     * Returns an \ref xfunction for the element-wise inverse hyperbolic
+     * sine of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto asinh(E&& e) noexcept -> detail::xfunction_type_t<math::asinh_fun, E>
+    {
+        return detail::make_xfunction<math::asinh_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup hyper_functions
+     * @brief Inverse hyperbolic cosine function.
+     *
+     * Returns an \ref xfunction for the element-wise inverse hyperbolic
+     * cosine of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto acosh(E&& e) noexcept -> detail::xfunction_type_t<math::acosh_fun, E>
+    {
+        return detail::make_xfunction<math::acosh_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup hyper_functions
+     * @brief Inverse hyperbolic tangent function.
+     *
+     * Returns an \ref xfunction for the element-wise inverse hyperbolic
+     * tangent of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto atanh(E&& e) noexcept -> detail::xfunction_type_t<math::atanh_fun, E>
+    {
+        return detail::make_xfunction<math::atanh_fun>(std::forward<E>(e));
+    }
+
+    /*****************************
+     * error and gamma functions *
+     *****************************/
+
+    /**
+     * @defgroup err_functions Error and gamma functions
+     */
+
+    /**
+     * @ingroup err_functions
+     * @brief Error function.
+     *
+     * Returns an \ref xfunction for the element-wise error function
+     * of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto erf(E&& e) noexcept -> detail::xfunction_type_t<math::erf_fun, E>
+    {
+        return detail::make_xfunction<math::erf_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup err_functions
+     * @brief Complementary error function.
+     *
+     * Returns an \ref xfunction for the element-wise complementary
+     * error function of \em e, whithout loss of precision for large argument.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto erfc(E&& e) noexcept -> detail::xfunction_type_t<math::erfc_fun, E>
+    {
+        return detail::make_xfunction<math::erfc_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup err_functions
+     * @brief Gamma function.
+     *
+     * Returns an \ref xfunction for the element-wise gamma function
+     * of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto tgamma(E&& e) noexcept -> detail::xfunction_type_t<math::tgamma_fun, E>
+    {
+        return detail::make_xfunction<math::tgamma_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup err_functions
+     * @brief Natural logarithm of the gamma function.
+     *
+     * Returns an \ref xfunction for the element-wise logarithm of
+     * the asbolute value fo the gamma function of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto lgamma(E&& e) noexcept -> detail::xfunction_type_t<math::lgamma_fun, E>
+    {
+        return detail::make_xfunction<math::lgamma_fun>(std::forward<E>(e));
+    }
+
+    /*********************************************
+     * nearest integer floating point operations *
+     *********************************************/
+
+    /**
+     * @defgroup nearint_functions Nearest integer floating point operations
+     */
+
+    /**
+     * @ingroup nearint_functions
+     * @brief ceil function.
+     *
+     * Returns an \ref xfunction for the element-wise smallest integer value
+     * not less than \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto ceil(E&& e) noexcept -> detail::xfunction_type_t<math::ceil_fun, E>
+    {
+        return detail::make_xfunction<math::ceil_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup nearint_functions
+     * @brief floor function.
+     *
+     * Returns an \ref xfunction for the element-wise smallest integer value
+     * not greater than \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto floor(E&& e) noexcept -> detail::xfunction_type_t<math::floor_fun, E>
+    {
+        return detail::make_xfunction<math::floor_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup nearint_functions
+     * @brief trunc function.
+     *
+     * Returns an \ref xfunction for the element-wise nearest integer not greater
+     * in magnitude than \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto trunc(E&& e) noexcept -> detail::xfunction_type_t<math::trunc_fun, E>
+    {
+        return detail::make_xfunction<math::trunc_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup nearint_functions
+     * @brief round function.
+     *
+     * Returns an \ref xfunction for the element-wise nearest integer value
+     * to \em e, rounding halfway cases away from zero, regardless of the
+     * current rounding mode.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto round(E&& e) noexcept -> detail::xfunction_type_t<math::round_fun, E>
+    {
+        return detail::make_xfunction<math::round_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup nearint_functions
+     * @brief nearbyint function.
+     *
+     * Returns an \ref xfunction for the element-wise rounding of \em e to integer
+     * values in floating point format, using the current rounding mode. nearbyint
+     * never raises FE_INEXACT error.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto nearbyint(E&& e) noexcept -> detail::xfunction_type_t<math::nearbyint_fun, E>
+    {
+        return detail::make_xfunction<math::nearbyint_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup nearint_functions
+     * @brief rint function.
+     *
+     * Returns an \ref xfunction for the element-wise rounding of \em e to integer
+     * values in floating point format, using the current rounding mode. Contrary
+     * to nearbyint, rint may raise FE_INEXACT error.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto rint(E&& e) noexcept -> detail::xfunction_type_t<math::rint_fun, E>
+    {
+        return detail::make_xfunction<math::rint_fun>(std::forward<E>(e));
+    }
+
+    /****************************
+     * classification functions *
+     ****************************/
+
+    /**
+     * @defgroup classif_functions Classification functions
+     */
+
+    /**
+     * @ingroup classif_functions
+     * @brief finite value check
+     *
+     * Returns an \ref xfunction for the element-wise finite value check
+     * tangent of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto isfinite(E&& e) noexcept -> detail::xfunction_type_t<math::isfinite_fun, E>
+    {
+        return detail::make_xfunction<math::isfinite_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup classif_functions
+     * @brief infinity check
+     *
+     * Returns an \ref xfunction for the element-wise infinity check
+     * tangent of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto isinf(E&& e) noexcept -> detail::xfunction_type_t<math::isinf_fun, E>
+    {
+        return detail::make_xfunction<math::isinf_fun>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup classif_functions
+     * @brief NaN check
+     *
+     * Returns an \ref xfunction for the element-wise NaN check
+     * tangent of \em e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto isnan(E&& e) noexcept -> detail::xfunction_type_t<math::isnan_fun, E>
+    {
+        return detail::make_xfunction<math::isnan_fun>(std::forward<E>(e));
+    }
+
+    namespace detail
+    {
+        template <class FUNCTOR, class T, std::size_t... Is>
+        inline auto get_functor(T&& args, std::index_sequence<Is...>)
+        {
+            return FUNCTOR(std::get<Is>(args)...);
+        }
+
+        template <class F, class... A, class... E>
+        inline auto make_xfunction(std::tuple<A...>&& f_args, E&&... e) noexcept
+        {
+            using functor_type = F;
+            using expression_tag = xexpression_tag_t<E...>;
+            using type = select_xfunction_expression_t<expression_tag, functor_type, const_xclosure_t<E>...>;
+            auto functor = get_functor<functor_type>(
+                std::forward<std::tuple<A...>>(f_args),
+                std::make_index_sequence<sizeof...(A)>{}
+            );
+            return type(std::move(functor), std::forward<E>(e)...);
+        }
+
+        struct isclose
+        {
+            using result_type = bool;
+
+            isclose(double rtol, double atol, bool equal_nan)
+                : m_rtol(rtol)
+                , m_atol(atol)
+                , m_equal_nan(equal_nan)
+            {
+            }
+
+            template <class A1, class A2>
+            bool operator()(const A1& a, const A2& b) const
+            {
+                using internal_type = xtl::promote_type_t<A1, A2, double>;
+                if (math::isnan(a) && math::isnan(b))
+                {
+                    return m_equal_nan;
+                }
+                if (math::isinf(a) && math::isinf(b))
+                {
+                    // check for both infinity signs equal
+                    return a == b;
+                }
+                auto d = math::abs(internal_type(a) - internal_type(b));
+                return d <= m_atol
+                       || d <= m_rtol
+                                   * double((std::max)(math::abs(internal_type(a)), math::abs(internal_type(b)))
+                                   );
+            }
+
+        private:
+
+            double m_rtol;
+            double m_atol;
+            bool m_equal_nan;
+        };
+    }
+
+    /**
+     * @ingroup classif_functions
+     * @brief Element-wise closeness detection
+     *
+     * Returns an \ref xfunction that evaluates to
+     * true if the elements in ``e1`` and ``e2`` are close to each other
+     * according to parameters ``atol`` and ``rtol``.
+     * The equation is: ``std::abs(a - b) <= (m_atol + m_rtol * std::abs(b))``.
+     * @param e1 input array to compare
+     * @param e2 input array to compare
+     * @param rtol the relative tolerance parameter (default 1e-05)
+     * @param atol the absolute tolerance parameter (default 1e-08)
+     * @param equal_nan if true, isclose returns true if both elements of e1 and e2 are NaN
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto
+    isclose(E1&& e1, E2&& e2, double rtol = 1e-05, double atol = 1e-08, bool equal_nan = false) noexcept
+    {
+        return detail::make_xfunction<detail::isclose>(
+            std::make_tuple(rtol, atol, equal_nan),
+            std::forward<E1>(e1),
+            std::forward<E2>(e2)
+        );
+    }
+
+    /**
+     * @ingroup classif_functions
+     * @brief Check if all elements in \em e1 are close to the
+     * corresponding elements in \em e2.
+     *
+     * Returns true if all elements in ``e1`` and ``e2`` are close to each other
+     * according to parameters ``atol`` and ``rtol``.
+     * @param e1 input array to compare
+     * @param e2 input arrays to compare
+     * @param rtol the relative tolerance parameter (default 1e-05)
+     * @param atol the absolute tolerance parameter (default 1e-08)
+     * @return a boolean
+     */
+    template <class E1, class E2>
+    inline auto allclose(E1&& e1, E2&& e2, double rtol = 1e-05, double atol = 1e-08) noexcept
+    {
+        return xt::all(isclose(std::forward<E1>(e1), std::forward<E2>(e2), rtol, atol));
+    }
+
+    /**********************
+     * Reducing functions *
+     **********************/
+
+    /**
+     * @defgroup  red_functions reducing functions
+     */
+
+    /**
+     * @ingroup red_functions
+     * @brief Sum of elements over given axes.
+     *
+     * Returns an \ref xreducer for the sum of elements over given
+     * \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the sum is performed (optional)
+     * @param es evaluation strategy of the reducer
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T` is also used for determining the value type
+     *           of the result, which is the type of `T() + E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(sum, detail::plus, typename std::decay_t<E>::value_type, 0)
+
+    /**
+     * @ingroup red_functions
+     * @brief Product of elements over given axes.
+     *
+     * Returns an \ref xreducer for the product of elements over given
+     * \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the product is computed (optional)
+     * @param ddof delta degrees of freedom (optional).
+     *             The divisor used in calculations is N - ddof, where N represents the number of
+     *             elements. By default ddof is zero.
+     * @param es evaluation strategy of the reducer
+     * @tparam T the value type used for internal computation. The default is `E::value_type`.
+     *           `T` is also used for determining the value type of the result, which is the type
+     *           of `T() * E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(prod, detail::multiplies, typename std::decay_t<E>::value_type, 1)
+
+    namespace detail
+    {
+        template <class T, class S, class ST>
+        inline auto mean_division(S&& s, ST e_size)
+        {
+            using value_type = typename std::conditional_t<std::is_same<T, void>::value, double, T>;
+            // Avoids floating point exception when s.size is 0
+            value_type div = s.size() != ST(0) ? static_cast<value_type>(e_size / s.size()) : value_type(0);
+            return std::move(s) / std::move(div);
+        }
+
+        template <
+            class T,
+            class E,
+            class X,
+            class D,
+            class EVS,
+            XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::is_integral<D>)>
+        inline auto mean(E&& e, X&& axes, const D& ddof, EVS es)
+        {
+            // sum cannot always be a double. It could be a complex number which cannot operate on
+            // std::plus<double>.
+            using size_type = typename std::decay_t<E>::size_type;
+            const size_type size = e.size();
+            XTENSOR_ASSERT(static_cast<size_type>(ddof) <= size);
+            auto s = sum<T>(std::forward<E>(e), std::forward<X>(axes), es);
+            return mean_division<T>(std::move(s), size - static_cast<size_type>(ddof));
+        }
+
+        template <class T, class E, class I, std::size_t N, class D, class EVS>
+        inline auto mean(E&& e, const I (&axes)[N], const D& ddof, EVS es)
+        {
+            using size_type = typename std::decay_t<E>::size_type;
+            const size_type size = e.size();
+            XTENSOR_ASSERT(static_cast<size_type>(ddof) <= size);
+            auto s = sum<T>(std::forward<E>(e), axes, es);
+            return mean_division<T>(std::move(s), size - static_cast<size_type>(ddof));
+        }
+
+        template <class T, class E, class D, class EVS, XTL_REQUIRES(is_reducer_options<EVS>, xtl::is_integral<D>)>
+        inline auto mean_noaxis(E&& e, const D& ddof, EVS es)
+        {
+            using value_type = typename std::conditional_t<std::is_same<T, void>::value, double, T>;
+            using size_type = typename std::decay_t<E>::size_type;
+            const size_type size = e.size();
+            XTENSOR_ASSERT(static_cast<size_type>(ddof) <= size);
+            auto s = sum<T>(std::forward<E>(e), es);
+            return std::move(s) / static_cast<value_type>((size - static_cast<size_type>(ddof)));
+        }
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Mean of elements over given axes.
+     *
+     * Returns an \ref xreducer for the mean of elements over given
+     * \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the mean is computed (optional)
+     * @param es the evaluation strategy (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T` is also used for determining the value type
+     *           of the result, which is the type of `T() + E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xexpression
+     */
+    template <
+        class T = void,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)>
+    inline auto mean(E&& e, X&& axes, EVS es = EVS())
+    {
+        return detail::mean<T>(std::forward<E>(e), std::forward<X>(axes), 0u, es);
+    }
+
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto mean(E&& e, EVS es = EVS())
+    {
+        return detail::mean_noaxis<T>(std::forward<E>(e), 0u, es);
+    }
+
+    template <class T = void, class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto mean(E&& e, const I (&axes)[N], EVS es = EVS())
+    {
+        return detail::mean<T>(std::forward<E>(e), axes, 0u, es);
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Average of elements over given axes using weights.
+     *
+     * Returns an \ref xreducer for the mean of elements over given
+     * \em axes.
+     * @param e an \ref xexpression
+     * @param weights \ref xexpression containing weights associated with the values in \ref e
+     * @param axes the axes along which the mean is computed (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T`is also used for determining the value type of the result,
+     *           which is the type of `T() + E::value_type().
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xexpression
+     *
+     * @sa mean
+     */
+    template <
+        class T = void,
+        class E,
+        class W,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(is_reducer_options<EVS>, xtl::negation<xtl::is_integral<X>>)>
+    inline auto average(E&& e, W&& weights, X&& axes, EVS ev = EVS())
+    {
+        xindex_type_t<typename std::decay_t<E>::shape_type> broadcast_shape;
+        xt::resize_container(broadcast_shape, e.dimension());
+        auto ax = normalize_axis(e, axes);
+        if (weights.dimension() == 1)
+        {
+            if (weights.size() != e.shape()[ax[0]])
+            {
+                XTENSOR_THROW(std::runtime_error, "Weights need to have the same shape as expression at axes.");
+            }
+
+            std::fill(broadcast_shape.begin(), broadcast_shape.end(), std::size_t(1));
+            broadcast_shape[ax[0]] = weights.size();
+        }
+        else
+        {
+            if (!same_shape(e.shape(), weights.shape()))
+            {
+                XTENSOR_THROW(
+                    std::runtime_error,
+                    "Weights with dim > 1 need to have the same shape as expression."
+                );
+            }
+
+            std::copy(e.shape().begin(), e.shape().end(), broadcast_shape.begin());
+        }
+
+        constexpr layout_type L = default_assignable_layout(std::decay_t<W>::static_layout);
+        auto weights_view = reshape_view<L>(std::forward<W>(weights), std::move(broadcast_shape));
+        auto scl = sum<T>(weights_view, ax, xt::evaluation_strategy::immediate);
+        return sum<T>(std::forward<E>(e) * std::move(weights_view), std::move(ax), ev) / std::move(scl);
+    }
+
+    template <
+        class T = void,
+        class E,
+        class W,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(is_reducer_options<EVS>, xtl::is_integral<X>)>
+    inline auto average(E&& e, W&& weights, X axis, EVS ev = EVS())
+    {
+        return average(std::forward<E>(e), std::forward<W>(weights), {axis}, std::forward<EVS>(ev));
+    }
+
+    template <class T = void, class E, class W, class X, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto average(E&& e, W&& weights, const X (&axes)[N], EVS ev = EVS())
+    {
+        // need to select the X&& overload and forward to different type
+        using ax_t = std::array<std::size_t, N>;
+        return average<T>(std::forward<E>(e), std::forward<W>(weights), xt::forward_normalize<ax_t>(e, axes), ev);
+    }
+
+    template <class T = void, class E, class W, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto average(E&& e, W&& weights, EVS ev = EVS())
+    {
+        if (weights.dimension() != e.dimension()
+            || !std::equal(weights.shape().begin(), weights.shape().end(), e.shape().begin()))
+        {
+            XTENSOR_THROW(std::runtime_error, "Weights need to have the same shape as expression.");
+        }
+
+        auto div = sum<T>(weights, evaluation_strategy::immediate)();
+        auto s = sum<T>(std::forward<E>(e) * std::forward<W>(weights), ev) / std::move(div);
+        return s;
+    }
+
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto average(E&& e, EVS ev = EVS())
+    {
+        return mean<T>(e, ev);
+    }
+
+    namespace detail
+    {
+        template <typename E>
+        std::enable_if_t<std::is_lvalue_reference<E>::value, E> shared_forward(E e) noexcept
+        {
+            return e;
+        }
+
+        template <typename E>
+        std::enable_if_t<!std::is_lvalue_reference<E>::value, xshared_expression<E>> shared_forward(E e) noexcept
+        {
+            return make_xshared(std::move(e));
+        }
+    }
+
+    template <
+        class T = void,
+        class E,
+        class D,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(is_reducer_options<EVS>, xtl::is_integral<D>)>
+    inline auto variance(E&& e, const D& ddof, EVS es = EVS())
+    {
+        auto cached_mean = mean<T>(e, es)();
+        return detail::mean_noaxis<T>(square(std::forward<E>(e) - std::move(cached_mean)), ddof, es);
+    }
+
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto variance(E&& e, EVS es = EVS())
+    {
+        return variance<T>(std::forward<E>(e), 0u, es);
+    }
+
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto stddev(E&& e, EVS es = EVS())
+    {
+        return sqrt(variance<T>(std::forward<E>(e), es));
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Compute the variance along the specified axes
+     *
+     * Returns the variance of the array elements, a measure of the spread of a
+     * distribution. The variance is computed for the flattened array by default,
+     * otherwise over the specified axes.
+     *
+     * Note: this function is not yet specialized for complex numbers.
+     *
+     * @param e an \ref xexpression
+     * @param axes the axes along which the variance is computed (optional)
+     * @param ddof delta degrees of freedom (optional).
+     *             The divisor used in calculations is N - ddof, where N represents the number of
+     *             elements. By default ddof is zero.
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T`is also used for determining the value type of the result,
+     *           which is the type of `T() + E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xexpression
+     *
+     * @sa stddev, mean
+     */
+    template <
+        class T = void,
+        class E,
+        class X,
+        class D,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::is_integral<D>)>
+    inline auto variance(E&& e, X&& axes, const D& ddof, EVS es = EVS())
+    {
+        decltype(auto) sc = detail::shared_forward<E>(e);
+        // note: forcing copy of first axes argument -- is there a better solution?
+        auto axes_copy = axes;
+        // always eval to prevent repeated evaluations in the next calls
+        auto inner_mean = eval(mean<T>(sc, std::move(axes_copy), evaluation_strategy::immediate));
+
+        // fake keep_dims = 1
+        // Since the inner_shape might have a reference semantic (e.g. xbuffer_adaptor in bindings)
+        // We need to map it to another type before modifying it.
+        // We pragmatically abuse `get_strides_t`
+        using tmp_shape_t = get_strides_t<typename std::decay_t<E>::shape_type>;
+        tmp_shape_t keep_dim_shape = xtl::forward_sequence<tmp_shape_t, decltype(e.shape())>(e.shape());
+        for (const auto& el : axes)
+        {
+            keep_dim_shape[el] = 1u;
+        }
+
+        auto mrv = reshape_view<XTENSOR_DEFAULT_LAYOUT>(std::move(inner_mean), std::move(keep_dim_shape));
+        return detail::mean<T>(square(sc - std::move(mrv)), std::forward<X>(axes), ddof, es);
+    }
+
+    template <
+        class T = void,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<xtl::is_integral<std::decay_t<X>>>, is_reducer_options<EVS>)>
+    inline auto variance(E&& e, X&& axes, EVS es = EVS())
+    {
+        return variance<T>(std::forward<E>(e), std::forward<X>(axes), 0u, es);
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Compute the standard deviation along the specified axis.
+     *
+     * Returns the standard deviation, a measure of the spread of a distribution,
+     * of the array elements. The standard deviation is computed for the flattened
+     * array by default, otherwise over the specified axis.
+     *
+     * Note: this function is not yet specialized for complex numbers.
+     *
+     * @param e an \ref xexpression
+     * @param axes the axes along which the standard deviation is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T`is also used for determining the value type of the result,
+     *           which is the type of `T() + E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xexpression
+     *
+     * @sa variance, mean
+     */
+    template <
+        class T = void,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)>
+    inline auto stddev(E&& e, X&& axes, EVS es = EVS())
+    {
+        return sqrt(variance<T>(std::forward<E>(e), std::forward<X>(axes), es));
+    }
+
+    template <class T = void, class E, class A, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto stddev(E&& e, const A (&axes)[N], EVS es = EVS())
+    {
+        return stddev<T>(
+            std::forward<E>(e),
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(axes)>(axes),
+            es
+        );
+    }
+
+    template <
+        class T = void,
+        class E,
+        class A,
+        std::size_t N,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto variance(E&& e, const A (&axes)[N], EVS es = EVS())
+    {
+        return variance<T>(
+            std::forward<E>(e),
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(axes)>(axes),
+            es
+        );
+    }
+
+    template <class T = void, class E, class A, std::size_t N, class D, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto variance(E&& e, const A (&axes)[N], const D& ddof, EVS es = EVS())
+    {
+        return variance<T>(
+            std::forward<E>(e),
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(axes)>(axes),
+            ddof,
+            es
+        );
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Minimum and maximum among the elements of an array or expression.
+     *
+     * Returns an \ref xreducer for the minimum and maximum of an expression's elements.
+     * @param e an \ref xexpression
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xexpression of type ``std::array<value_type, 2>``, whose first
+     *         and second element represent the minimum and maximum respectively
+     */
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto minmax(E&& e, EVS es = EVS())
+    {
+        using std::max;
+        using std::min;
+        using value_type = typename std::decay_t<E>::value_type;
+        using result_type = std::array<value_type, 2>;
+        using init_value_fct = xt::const_value<result_type>;
+
+        auto reduce_func = [](auto r, const auto& v)
+        {
+            r[0] = (min) (r[0], v);
+            r[1] = (max) (r[1], v);
+            return r;
+        };
+
+        auto init_func = init_value_fct(
+            result_type{std::numeric_limits<value_type>::max(), std::numeric_limits<value_type>::lowest()}
+        );
+
+        auto merge_func = [](auto r, const auto& s)
+        {
+            r[0] = (min) (r[0], s[0]);
+            r[1] = (max) (r[1], s[1]);
+            return r;
+        };
+        return xt::reduce(
+            make_xreducer_functor(std::move(reduce_func), std::move(init_func), std::move(merge_func)),
+            std::forward<E>(e),
+            arange(e.dimension()),
+            es
+        );
+    }
+
+    /**
+     * @defgroup acc_functions accumulating functions
+     */
+
+    /**
+     * @ingroup acc_functions
+     * @brief Cumulative sum.
+     *
+     * Returns the accumulated sum for the elements over given
+     * \em axis (or flattened).
+     * @param e an \ref xexpression
+     * @param axis the axes along which the cumulative sum is computed (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T`is also used for determining the value type of the result,
+     *           which is the type of `T() + E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xarray<T>
+     */
+    template <class T = void, class E>
+    inline auto cumsum(E&& e, std::ptrdiff_t axis)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::plus(), detail::accumulator_identity<init_value_type>()),
+            std::forward<E>(e),
+            axis
+        );
+    }
+
+    template <class T = void, class E>
+    inline auto cumsum(E&& e)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::plus(), detail::accumulator_identity<init_value_type>()),
+            std::forward<E>(e)
+        );
+    }
+
+    /**
+     * @ingroup acc_functions
+     * @brief Cumulative product.
+     *
+     * Returns the accumulated product for the elements over given
+     * \em axis (or flattened).
+     * @param e an \ref xexpression
+     * @param axis the axes along which the cumulative product is computed (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T`is also used for determining the value type of the result,
+     *           which is the type of `T() * E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xarray<T>
+     */
+    template <class T = void, class E>
+    inline auto cumprod(E&& e, std::ptrdiff_t axis)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::multiplies(), detail::accumulator_identity<init_value_type>()),
+            std::forward<E>(e),
+            axis
+        );
+    }
+
+    template <class T = void, class E>
+    inline auto cumprod(E&& e)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::multiplies(), detail::accumulator_identity<init_value_type>()),
+            std::forward<E>(e)
+        );
+    }
+
+    /*****************
+     * nan functions *
+     *****************/
+
+    namespace detail
+    {
+        struct nan_to_num_functor
+        {
+            template <class A>
+            inline auto operator()(const A& a) const
+            {
+                if (math::isnan(a))
+                {
+                    return A(0);
+                }
+                if (math::isinf(a))
+                {
+                    if (a < 0)
+                    {
+                        return std::numeric_limits<A>::lowest();
+                    }
+                    else
+                    {
+                        return (std::numeric_limits<A>::max)();
+                    }
+                }
+                return a;
+            }
+        };
+
+        struct nan_min
+        {
+            template <class T, class U>
+            constexpr auto operator()(const T lhs, const U rhs) const
+            {
+                // Clunky expression for working with GCC 4.9
+                return math::isnan(lhs)
+                           ? rhs
+                           : (math::isnan(rhs) ? lhs
+                                               : std::common_type_t<T, U>(
+                                                   detail::make_xfunction<math::minimum<void>>(lhs, rhs)
+                                               ));
+            }
+        };
+
+        struct nan_max
+        {
+            template <class T, class U>
+            constexpr auto operator()(const T lhs, const U rhs) const
+            {
+                // Clunky expression for working with GCC 4.9
+                return math::isnan(lhs)
+                           ? rhs
+                           : (math::isnan(rhs) ? lhs
+                                               : std::common_type_t<T, U>(
+                                                   detail::make_xfunction<math::maximum<void>>(lhs, rhs)
+                                               ));
+            }
+        };
+
+        struct nan_plus
+        {
+            template <class T, class U>
+            constexpr auto operator()(const T lhs, const U rhs) const
+            {
+                return !math::isnan(rhs) ? lhs + rhs : lhs;
+            }
+        };
+
+        struct nan_multiplies
+        {
+            template <class T, class U>
+            constexpr auto operator()(const T lhs, const U rhs) const
+            {
+                return !math::isnan(rhs) ? lhs * rhs : lhs;
+            }
+        };
+
+        template <class T, int V>
+        struct nan_init
+        {
+            using value_type = T;
+            using result_type = T;
+
+            constexpr result_type operator()(const value_type lhs) const
+            {
+                return math::isnan(lhs) ? result_type(V) : lhs;
+            }
+        };
+    }
+
+    /**
+     * @defgroup  nan_functions nan functions
+     */
+
+    /**
+     * @ingroup nan_functions
+     * @brief Convert nan or +/- inf to numbers
+     *
+     * This functions converts NaN to 0, and +inf to the highest, -inf to the lowest
+     * floating point value of the same type.
+     *
+     * @param e input \ref xexpression
+     * @return an \ref xexpression
+     */
+    template <class E>
+    inline auto nan_to_num(E&& e)
+    {
+        return detail::make_xfunction<detail::nan_to_num_functor>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup nan_functions
+     * @brief Minimum element over given axes, ignoring NaNs.
+     *
+     * Returns an \ref xreducer for the minimum of elements over given
+     * @p axes, ignoring NaNs.
+     * @warning Casting the result to an integer type can cause undefined behavior.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the minimum is found (optional)
+     * @param es evaluation strategy of the reducer (optional)
+     * @tparam T the result type. The default is `E::value_type`.
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(nanmin, detail::nan_min, typename std::decay_t<E>::value_type, std::nan("0"))
+
+    /**
+     * @ingroup nan_functions
+     * @brief Maximum element along given axes, ignoring NaNs.
+     *
+     * Returns an \ref xreducer for the sum of elements over given
+     * @p axes, ignoring NaN.
+     * @warning Casting the result to an integer type can cause undefined behavior.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the sum is performed (optional)
+     * @param es evaluation strategy of the reducer (optional)
+     * @tparam T the result type. The default is `E::value_type`.
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(nanmax, detail::nan_max, typename std::decay_t<E>::value_type, std::nan("0"))
+
+    /**
+     * @ingroup nan_functions
+     * @brief Sum of elements over given axes, replacing NaN with 0.
+     *
+     * Returns an \ref xreducer for the sum of elements over given
+     * @p axes, ignoring NaN.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the sum is performed (optional)
+     * @param es evaluation strategy of the reducer (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T` is also used for determining the value type
+     *           of the result, which is the type of `T() + E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(nansum, detail::nan_plus, typename std::decay_t<E>::value_type, 0)
+
+    /**
+     * @ingroup nan_functions
+     * @brief Product of elements over given axes, replacing NaN with 1.
+     *
+     * Returns an \ref xreducer for the sum of elements over given
+     * @p axes, replacing nan with 1.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the sum is performed (optional)
+     * @param es evaluation strategy of the reducer (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T` is also used for determining the value type
+     *           of the result, which is the type of `T() * E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xreducer
+     */
+    XTENSOR_REDUCER_FUNCTION(nanprod, detail::nan_multiplies, typename std::decay_t<E>::value_type, 1)
+
+#define COUNT_NON_ZEROS_CONTENT                                                             \
+    using value_type = typename std::decay_t<E>::value_type;                                \
+    using result_type = xt::detail::xreducer_size_type_t<value_type>;                       \
+    using init_value_fct = xt::const_value<result_type>;                                    \
+                                                                                            \
+    auto init_fct = init_value_fct(0);                                                      \
+                                                                                            \
+    auto reduce_fct = [](const auto& lhs, const auto& rhs)                                  \
+    {                                                                                       \
+        using value_t = xt::detail::xreducer_temporary_type_t<std::decay_t<decltype(rhs)>>; \
+        using result_t = std::decay_t<decltype(lhs)>;                                       \
+                                                                                            \
+        return (rhs != value_t(0)) ? lhs + result_t(1) : lhs;                               \
+    };                                                                                      \
+    auto merge_func = detail::plus();
+
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto count_nonzero(E&& e, EVS es = EVS())
+    {
+        COUNT_NON_ZEROS_CONTENT;
+        return xt::reduce(
+            make_xreducer_functor(std::move(reduce_fct), std::move(init_fct), std::move(merge_func)),
+            std::forward<E>(e),
+            es
+        );
+    }
+
+    template <
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<xtl::is_integral<X>>)>
+    inline auto count_nonzero(E&& e, X&& axes, EVS es = EVS())
+    {
+        COUNT_NON_ZEROS_CONTENT;
+        return xt::reduce(
+            make_xreducer_functor(std::move(reduce_fct), std::move(init_fct), std::move(merge_func)),
+            std::forward<E>(e),
+            std::forward<X>(axes),
+            es
+        );
+    }
+
+    template <
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::is_integral<X>)>
+    inline auto count_nonzero(E&& e, X axis, EVS es = EVS())
+    {
+        return count_nonzero(std::forward<E>(e), {axis}, es);
+    }
+
+    template <class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto count_nonzero(E&& e, const I (&axes)[N], EVS es = EVS())
+    {
+        COUNT_NON_ZEROS_CONTENT;
+        return xt::reduce(
+            make_xreducer_functor(std::move(reduce_fct), std::move(init_fct), std::move(merge_func)),
+            std::forward<E>(e),
+            axes,
+            es
+        );
+    }
+
+#undef COUNT_NON_ZEROS_CONTENT
+
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto count_nonnan(E&& e, EVS es = EVS())
+    {
+        return xt::count_nonzero(!xt::isnan(std::forward<E>(e)), es);
+    }
+
+    template <
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<xtl::is_integral<X>>)>
+    inline auto count_nonnan(E&& e, X&& axes, EVS es = EVS())
+    {
+        return xt::count_nonzero(!xt::isnan(std::forward<E>(e)), std::forward<X>(axes), es);
+    }
+
+    template <
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::is_integral<X>)>
+    inline auto count_nonnan(E&& e, X&& axes, EVS es = EVS())
+    {
+        return xt::count_nonzero(!xt::isnan(std::forward<E>(e)), {axes}, es);
+    }
+
+    template <class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto count_nonnan(E&& e, const I (&axes)[N], EVS es = EVS())
+    {
+        return xt::count_nonzero(!xt::isnan(std::forward<E>(e)), axes, es);
+    }
+
+    /**
+     * @ingroup nan_functions
+     * @brief Cumulative sum, replacing nan with 0.
+     *
+     * Returns an xaccumulator for the sum of elements over given
+     * \em axis, replacing nan with 0.
+     * @param e an \ref xexpression
+     * @param axis the axis along which the elements are accumulated (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T` is also used for determining the value type
+     *           of the result, which is the type of `T() + E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an xaccumulator
+     */
+    template <class T = void, class E>
+    inline auto nancumsum(E&& e, std::ptrdiff_t axis)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::nan_plus(), detail::nan_init<init_value_type, 0>()),
+            std::forward<E>(e),
+            axis
+        );
+    }
+
+    template <class T = void, class E>
+    inline auto nancumsum(E&& e)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::nan_plus(), detail::nan_init<init_value_type, 0>()),
+            std::forward<E>(e)
+        );
+    }
+
+    /**
+     * @ingroup nan_functions
+     * @brief Cumulative product, replacing nan with 1.
+     *
+     * Returns an xaccumulator for the product of elements over given
+     * \em axis, replacing nan with 1.
+     * @param e an \ref xexpression
+     * @param axis the axis along which the elements are accumulated (optional)
+     * @tparam T the value type used for internal computation. The default is
+     *           `E::value_type`. `T` is also used for determining the value type
+     *           of the result, which is the type of `T() * E::value_type()`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an xaccumulator
+     */
+    template <class T = void, class E>
+    inline auto nancumprod(E&& e, std::ptrdiff_t axis)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::nan_multiplies(), detail::nan_init<init_value_type, 1>()),
+            std::forward<E>(e),
+            axis
+        );
+    }
+
+    template <class T = void, class E>
+    inline auto nancumprod(E&& e)
+    {
+        using init_value_type = std::conditional_t<std::is_same<T, void>::value, typename std::decay_t<E>::value_type, T>;
+        return accumulate(
+            make_xaccumulator_functor(detail::nan_multiplies(), detail::nan_init<init_value_type, 1>()),
+            std::forward<E>(e)
+        );
+    }
+
+    namespace detail
+    {
+        template <class T>
+        struct diff_impl
+        {
+            template <class Arg>
+            inline void operator()(
+                Arg& ad,
+                const std::size_t& n,
+                xstrided_slice_vector& slice1,
+                xstrided_slice_vector& slice2,
+                std::size_t saxis
+            )
+            {
+                for (std::size_t i = 0; i < n; ++i)
+                {
+                    slice2[saxis] = range(xnone(), ad.shape()[saxis] - 1);
+                    ad = strided_view(ad, slice1) - strided_view(ad, slice2);
+                }
+            }
+        };
+
+        template <>
+        struct diff_impl<bool>
+        {
+            template <class Arg>
+            inline void operator()(
+                Arg& ad,
+                const std::size_t& n,
+                xstrided_slice_vector& slice1,
+                xstrided_slice_vector& slice2,
+                std::size_t saxis
+            )
+            {
+                for (std::size_t i = 0; i < n; ++i)
+                {
+                    slice2[saxis] = range(xnone(), ad.shape()[saxis] - 1);
+                    ad = not_equal(strided_view(ad, slice1), strided_view(ad, slice2));
+                }
+            }
+        };
+    }
+
+    /**
+     * @ingroup nan_functions
+     * @brief Mean of elements over given axes, excluding NaNs.
+     *
+     * Returns an \ref xreducer for the mean of elements over given
+     * \em axes, excluding NaNs.
+     * This is not the same as counting NaNs as zero, since excluding NaNs changes the number
+     * of elements considered in the statistic.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the mean is computed (optional)
+     * @param es the evaluation strategy (optional)
+     * @tparam T the result type. The default is `E::value_type`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xexpression
+     */
+    template <
+        class T = void,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)>
+    inline auto nanmean(E&& e, X&& axes, EVS es = EVS())
+    {
+        decltype(auto) sc = detail::shared_forward<E>(e);
+        // note: forcing copy of first axes argument -- is there a better solution?
+        auto axes_copy = axes;
+        using value_type = typename std::conditional_t<std::is_same<T, void>::value, double, T>;
+        using sum_type = typename std::conditional_t<
+            std::is_same<T, void>::value,
+            typename std::common_type_t<typename std::decay_t<E>::value_type, value_type>,
+            T>;
+        // sum cannot always be a double. It could be a complex number which cannot operate on
+        // std::plus<double>.
+        return nansum<sum_type>(sc, std::forward<X>(axes), es)
+               / xt::cast<value_type>(count_nonnan(sc, std::move(axes_copy), es));
+    }
+
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto nanmean(E&& e, EVS es = EVS())
+    {
+        decltype(auto) sc = detail::shared_forward<E>(e);
+        using value_type = typename std::conditional_t<std::is_same<T, void>::value, double, T>;
+        using sum_type = typename std::conditional_t<
+            std::is_same<T, void>::value,
+            typename std::common_type_t<typename std::decay_t<E>::value_type, value_type>,
+            T>;
+        return nansum<sum_type>(sc, es) / xt::cast<value_type>(count_nonnan(sc, es));
+    }
+
+    template <class T = void, class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto nanmean(E&& e, const I (&axes)[N], EVS es = EVS())
+    {
+        return nanmean<T>(
+            std::forward<E>(e),
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(axes)>(axes),
+            es
+        );
+    }
+
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto nanvar(E&& e, EVS es = EVS())
+    {
+        decltype(auto) sc = detail::shared_forward<E>(e);
+        return nanmean<T>(square(sc - nanmean<T>(sc)), es);
+    }
+
+    template <class T = void, class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_reducer_options<EVS>)>
+    inline auto nanstd(E&& e, EVS es = EVS())
+    {
+        return sqrt(nanvar<T>(std::forward<E>(e), es));
+    }
+
+    /**
+     * @ingroup nan_functions
+     * @brief Compute the variance along the specified axes, excluding NaNs
+     *
+     * Returns the variance of the array elements, a measure of the spread of a
+     * distribution. The variance is computed for the flattened array by default,
+     * otherwise over the specified axes.
+     * Excluding NaNs changes the number of elements considered in the statistic.
+     *
+     * Note: this function is not yet specialized for complex numbers.
+     *
+     * @param e an \ref xexpression
+     * @param axes the axes along which the variance is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @tparam T the result type. The default is `E::value_type`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xexpression
+     *
+     * @sa nanstd, nanmean
+     */
+    template <
+        class T = void,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)>
+    inline auto nanvar(E&& e, X&& axes, EVS es = EVS())
+    {
+        decltype(auto) sc = detail::shared_forward<E>(e);
+        // note: forcing copy of first axes argument -- is there a better solution?
+        auto axes_copy = axes;
+        using result_type = typename std::conditional_t<std::is_same<T, void>::value, double, T>;
+        auto inner_mean = nanmean<result_type>(sc, std::move(axes_copy));
+
+        // fake keep_dims = 1
+        // Since the inner_shape might have a reference semantic (e.g. xbuffer_adaptor in bindings)
+        // We need to map it to another type before modifying it.
+        // We pragmatically abuse `get_strides_t`
+        using tmp_shape_t = get_strides_t<typename std::decay_t<E>::shape_type>;
+        tmp_shape_t keep_dim_shape = xtl::forward_sequence<tmp_shape_t, decltype(e.shape())>(e.shape());
+        for (const auto& el : axes)
+        {
+            keep_dim_shape[el] = 1;
+        }
+        auto mrv = reshape_view<XTENSOR_DEFAULT_LAYOUT>(std::move(inner_mean), std::move(keep_dim_shape));
+        return nanmean<result_type>(square(cast<result_type>(sc) - std::move(mrv)), std::forward<X>(axes), es);
+    }
+
+    /**
+     * @ingroup nan_functions
+     * @brief Compute the standard deviation along the specified axis, excluding nans.
+     *
+     * Returns the standard deviation, a measure of the spread of a distribution,
+     * of the array elements. The standard deviation is computed for the flattened
+     * array by default, otherwise over the specified axis.
+     * Excluding NaNs changes the number of elements considered in the statistic.
+     *
+     * Note: this function is not yet specialized for complex numbers.
+     *
+     * @param e an \ref xexpression
+     * @param axes the axes along which the standard deviation is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @tparam T the result type. The default is `E::value_type`.
+     *           You can pass `big_promote_value_type_t<E>` to avoid overflow in computation.
+     * @return an \ref xexpression
+     *
+     * @sa nanvar, nanmean
+     */
+    template <
+        class T = void,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)>
+    inline auto nanstd(E&& e, X&& axes, EVS es = EVS())
+    {
+        return sqrt(nanvar<T>(std::forward<E>(e), std::forward<X>(axes), es));
+    }
+
+    template <class T = void, class E, class A, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto nanstd(E&& e, const A (&axes)[N], EVS es = EVS())
+    {
+        return nanstd<T>(
+            std::forward<E>(e),
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(axes)>(axes),
+            es
+        );
+    }
+
+    template <class T = void, class E, class A, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto nanvar(E&& e, const A (&axes)[N], EVS es = EVS())
+    {
+        return nanvar<T>(
+            std::forward<E>(e),
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(axes)>(axes),
+            es
+        );
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Calculate the n-th discrete difference along the given axis.
+     *
+     * Calculate the n-th discrete difference along the given axis. This function is not lazy (might change in
+     * the future).
+     * @param a an \ref xexpression
+     * @param n The number of times values are differenced. If zero, the input is returned as-is. (optional)
+     * @param axis The axis along which the difference is taken, default is the last axis.
+     * @return an xarray
+     */
+    template <class T>
+    auto diff(const xexpression<T>& a, std::size_t n = 1, std::ptrdiff_t axis = -1)
+    {
+        typename std::decay_t<T>::temporary_type ad = a.derived_cast();
+        std::size_t saxis = normalize_axis(ad.dimension(), axis);
+        if (n <= ad.size())
+        {
+            if (n != std::size_t(0))
+            {
+                xstrided_slice_vector slice1(ad.dimension(), all());
+                xstrided_slice_vector slice2(ad.dimension(), all());
+                slice1[saxis] = range(1, xnone());
+
+                detail::diff_impl<typename T::value_type> impl;
+                impl(ad, n, slice1, slice2, saxis);
+            }
+        }
+        else
+        {
+            auto shape = ad.shape();
+            shape[saxis] = std::size_t(0);
+            ad.resize(shape);
+        }
+        return ad;
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Integrate along the given axis using the composite trapezoidal rule.
+     *
+     * Returns definite integral as approximated by trapezoidal rule. This function is not lazy (might change
+     * in the future).
+     * @param y an \ref xexpression
+     * @param dx the spacing between sample points (optional)
+     * @param axis the axis along which to integrate.
+     * @return an xarray
+     */
+    template <class T>
+    auto trapz(const xexpression<T>& y, double dx = 1.0, std::ptrdiff_t axis = -1)
+    {
+        auto& yd = y.derived_cast();
+        std::size_t saxis = normalize_axis(yd.dimension(), axis);
+
+        xstrided_slice_vector slice1(yd.dimension(), all());
+        xstrided_slice_vector slice2(yd.dimension(), all());
+        slice1[saxis] = range(1, xnone());
+        slice2[saxis] = range(xnone(), yd.shape()[saxis] - 1);
+
+        auto trap = dx * (strided_view(yd, slice1) + strided_view(yd, slice2)) * 0.5;
+
+        return eval(sum(trap, {saxis}));
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Integrate along the given axis using the composite trapezoidal rule.
+     *
+     * Returns definite integral as approximated by trapezoidal rule. This function is not lazy (might change
+     * in the future).
+     * @param y an \ref xexpression
+     * @param x an \ref xexpression representing the sample points corresponding to the y values.
+     * @param axis the axis along which to integrate.
+     * @return an xarray
+     */
+    template <class T, class E>
+    auto trapz(const xexpression<T>& y, const xexpression<E>& x, std::ptrdiff_t axis = -1)
+    {
+        auto& yd = y.derived_cast();
+        auto& xd = x.derived_cast();
+        decltype(diff(x)) dx;
+
+        std::size_t saxis = normalize_axis(yd.dimension(), axis);
+
+        if (xd.dimension() == 1)
+        {
+            dx = diff(x);
+            typename std::decay_t<decltype(yd)>::shape_type shape;
+            resize_container(shape, yd.dimension());
+            std::fill(shape.begin(), shape.end(), 1);
+            shape[saxis] = dx.shape()[0];
+            dx.reshape(shape);
+        }
+        else
+        {
+            dx = diff(x, 1, axis);
+        }
+
+        xstrided_slice_vector slice1(yd.dimension(), all());
+        xstrided_slice_vector slice2(yd.dimension(), all());
+        slice1[saxis] = range(1, xnone());
+        slice2[saxis] = range(xnone(), yd.shape()[saxis] - 1);
+
+        auto trap = dx * (strided_view(yd, slice1) + strided_view(yd, slice2)) * 0.5;
+
+        return eval(sum(trap, {saxis}));
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Returns the one-dimensional piecewise linear interpolant to a function with given discrete data
+     * points (xp, fp), evaluated at x.
+     *
+     * @param x The x-coordinates at which to evaluate the interpolated values (sorted).
+     * @param xp The x-coordinates of the data points (sorted).
+     * @param fp The y-coordinates of the data points, same length as xp.
+     * @param left Value to return for x < xp[0].
+     * @param right Value to return for x > xp[-1]
+     * @return an one-dimensional xarray, same length as x.
+     */
+    template <class E1, class E2, class E3, typename T>
+    inline auto interp(const E1& x, const E2& xp, const E3& fp, T left, T right)
+    {
+        using size_type = common_size_type_t<E1, E2, E3>;
+        using value_type = typename E3::value_type;
+
+        // basic checks
+        XTENSOR_ASSERT(xp.dimension() == 1);
+        XTENSOR_ASSERT(std::is_sorted(x.cbegin(), x.cend()));
+        XTENSOR_ASSERT(std::is_sorted(xp.cbegin(), xp.cend()));
+
+        // allocate output
+        auto f = xtensor<value_type, 1>::from_shape(x.shape());
+
+        // counter in "x": from left
+        size_type i = 0;
+
+        // fill f[i] for x[i] <= xp[0]
+        for (; i < x.size(); ++i)
+        {
+            if (x[i] > xp[0])
+            {
+                break;
+            }
+            f[i] = static_cast<value_type>(left);
+        }
+
+        // counter in "x": from right
+        // (index counts one right, to terminate the reverse loop, without risking being negative)
+        size_type imax = x.size();
+
+        // fill f[i] for x[-1] >= xp[-1]
+        for (; imax > 0; --imax)
+        {
+            if (x[imax - 1] < xp[xp.size() - 1])
+            {
+                break;
+            }
+            f[imax - 1] = static_cast<value_type>(right);
+        }
+
+        // catch edge case: all entries are "right"
+        if (imax == 0)
+        {
+            return f;
+        }
+
+        // set "imax" as actual index
+        // (counted one right, see above)
+        --imax;
+
+        // counter in "xp"
+        size_type ip = 1;
+
+        // fill f[i] for the interior
+        for (; i <= imax; ++i)
+        {
+            // - search next value in "xp"
+            while (x[i] > xp[ip])
+            {
+                ++ip;
+            }
+            // - distances as doubles
+            double dfp = static_cast<double>(fp[ip] - fp[ip - 1]);
+            double dxp = static_cast<double>(xp[ip] - xp[ip - 1]);
+            double dx = static_cast<double>(x[i] - xp[ip - 1]);
+            // - interpolate
+            f[i] = fp[ip - 1] + static_cast<value_type>(dfp / dxp * dx);
+        }
+
+        return f;
+    }
+
+    namespace detail
+    {
+        template <class E1, class E2>
+        auto calculate_discontinuity(E1&& discontinuity, E2&&)
+        {
+            return discontinuity;
+        }
+
+        template <class E2>
+        auto calculate_discontinuity(xt::placeholders::xtuph, E2&& period)
+        {
+            return 0.5 * period;
+        }
+
+        template <class E1, class E2>
+        auto
+        calculate_interval(E2&& period, typename std::enable_if<std::is_integral<E1>::value, E1>::type* = 0)
+        {
+            auto interval_high = 0.5 * period;
+            uint64_t remainder = static_cast<uint64_t>(period) % 2;
+            auto boundary_ambiguous = (remainder == 0);
+            return std::make_tuple(interval_high, boundary_ambiguous);
+        }
+
+        template <class E1, class E2>
+        auto
+        calculate_interval(E2&& period, typename std::enable_if<std::is_floating_point<E1>::value, E1>::type* = 0)
+        {
+            auto interval_high = 0.5 * period;
+            auto boundary_ambiguous = true;
+            return std::make_tuple(interval_high, boundary_ambiguous);
+        }
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Unwrap by taking the complement of large deltas with respect to the period
+     * @details https://numpy.org/doc/stable/reference/generated/numpy.unwrap.html
+     * @param p Input array.
+     * @param discontinuity
+     *     Maximum discontinuity between values, default is `period / 2`.
+     *     Values below `period / 2` are treated as if they were `period / 2`.
+     *     To have an effect different from the default, use `discontinuity > period / 2`.
+     * @param axis Axis along which unwrap will operate, default: the last axis.
+     * @param period Size of the range over which the input wraps. Default: \f$ 2 \pi \f$.
+     */
+
+    template <class E1, class E2 = xt::placeholders::xtuph, class E3 = double>
+    inline auto unwrap(
+        E1&& p,
+        E2 discontinuity = xnone(),
+        std::ptrdiff_t axis = -1,
+        E3 period = 2.0 * xt::numeric_constants<double>::PI
+    )
+    {
+        auto discont = detail::calculate_discontinuity(discontinuity, period);
+        using value_type = typename std::decay_t<E1>::value_type;
+        std::size_t saxis = normalize_axis(p.dimension(), axis);
+        auto dd = diff(p, 1, axis);
+        xstrided_slice_vector slice(p.dimension(), all());
+        slice[saxis] = range(1, xnone());
+        auto interval_tuple = detail::calculate_interval<value_type>(period);
+        auto interval_high = std::get<0>(interval_tuple);
+        auto boundary_ambiguous = std::get<1>(interval_tuple);
+        auto interval_low = -interval_high;
+        auto ddmod = xt::eval(xt::fmod(xt::fmod(dd - interval_low, period) + period, period) + interval_low);
+        if (boundary_ambiguous)
+        {
+            // for `mask = (abs(dd) == period/2)`, the above line made
+            //`ddmod[mask] == -period/2`. correct these such that
+            //`ddmod[mask] == sign(dd[mask])*period/2`.
+            auto boolmap = xt::equal(ddmod, interval_low) && (xt::greater(dd, 0.0));
+            ddmod = xt::where(boolmap, interval_high, ddmod);
+        }
+        auto ph_correct = xt::eval(ddmod - dd);
+        ph_correct = xt::where(xt::abs(dd) < discont, 0.0, ph_correct);
+        E1 up(p);
+        strided_view(up, slice) = strided_view(p, slice)
+                                  + xt::cumsum(ph_correct, static_cast<std::ptrdiff_t>(saxis));
+        return up;
+    }
+
+    /**
+     * @ingroup basic_functions
+     * @brief Returns the one-dimensional piecewise linear interpolant to a function with given discrete data
+     * points (xp, fp), evaluated at x.
+     *
+     * @param x The x-coordinates at which to evaluate the interpolated values (sorted).
+     * @param xp The x-coordinates of the data points (sorted).
+     * @param fp The y-coordinates of the data points, same length as xp.
+     * @return an one-dimensional xarray, same length as x.
+     */
+    template <class E1, class E2, class E3>
+    inline auto interp(const E1& x, const E2& xp, const E3& fp)
+    {
+        return interp(x, xp, fp, fp[0], fp[fp.size() - 1]);
+    }
+
+    /**
+     * @brief Returns the covariance matrix
+     *
+     * @param x one or two dimensional array
+     * @param y optional one-dimensional array to build covariance to x
+     */
+    template <class E1>
+    inline auto cov(const E1& x, const E1& y = E1())
+    {
+        using value_type = typename E1::value_type;
+
+        if (y.dimension() == 0)
+        {
+            auto s = x.shape();
+            using size_type = std::decay_t<decltype(s[0])>;
+            if (x.dimension() == 1)
+            {
+                auto covar = eval(zeros<value_type>({1, 1}));
+                auto x_norm = x - eval(mean(x));
+                covar(0, 0) = std::inner_product(x_norm.begin(), x_norm.end(), x_norm.begin(), 0.0)
+                              / value_type(s[0] - 1);
+                return covar;
+            }
+
+            XTENSOR_ASSERT(x.dimension() == 2);
+
+            auto covar = eval(zeros<value_type>({s[0], s[0]}));
+            auto m = eval(mean(x, {1}));
+            m.reshape({m.shape()[0], 1});
+            auto x_norm = x - m;
+            for (size_type i = 0; i < s[0]; i++)
+            {
+                auto xi = strided_view(x_norm, {range(i, i + 1), all()});
+                for (size_type j = i; j < s[0]; j++)
+                {
+                    auto xj = strided_view(x_norm, {range(j, j + 1), all()});
+                    covar(j, i) = std::inner_product(xi.begin(), xi.end(), xj.begin(), 0.0)
+                                  / value_type(s[1] - 1);
+                }
+            }
+            return eval(covar + transpose(covar) - diag(diagonal(covar)));
+        }
+        else
+        {
+            return cov(eval(stack(xtuple(x, y))));
+        }
+    }
+
+    /*
+     * convolution mode placeholders for selecting the algorithm
+     * used in computing a 1D convolution.
+     * Same as NumPy's mode parameter.
+     */
+    namespace convolve_mode
+    {
+        struct valid
+        {
+        };
+
+        struct full
+        {
+        };
+    }
+
+    namespace detail
+    {
+        template <class E1, class E2>
+        inline auto convolve_impl(E1&& e1, E2&& e2, convolve_mode::valid)
+        {
+            using value_type = typename std::decay<E1>::type::value_type;
+
+            const std::size_t na = e1.size();
+            const std::size_t nv = e2.size();
+            const std::size_t n = na - nv + 1;
+            xt::xtensor<value_type, 1> out = xt::zeros<value_type>({n});
+            for (std::size_t i = 0; i < n; i++)
+            {
+                for (std::size_t j = 0; j < nv; j++)
+                {
+                    out(i) += e1(j) * e2(j + i);
+                }
+            }
+            return out;
+        }
+
+        template <class E1, class E2>
+        inline auto convolve_impl(E1&& e1, E2&& e2, convolve_mode::full)
+        {
+            using value_type = typename std::decay<E1>::type::value_type;
+
+            const std::size_t na = e1.size();
+            const std::size_t nv = e2.size();
+            const std::size_t n = na + nv - 1;
+            xt::xtensor<value_type, 1> out = xt::zeros<value_type>({n});
+            for (std::size_t i = 0; i < n; i++)
+            {
+                const std::size_t jmn = (i >= nv - 1) ? i - (nv - 1) : 0;
+                const std::size_t jmx = (i < na - 1) ? i : na - 1;
+                for (std::size_t j = jmn; j <= jmx; ++j)
+                {
+                    out(i) += e1(j) * e2(i - j);
+                }
+            }
+            return out;
+        }
+    }
+
+    /*
+     * @brief computes the 1D convolution between two 1D expressions
+     *
+     * @param a 1D expression
+     * @param v 1D expression
+     * @param mode placeholder Select algorithm #convolve_mode
+     *
+     * @detail the algorithm convolves a with v and will incur a copy overhead
+     *   should v be longer than a.
+     */
+    template <class E1, class E2, class E3>
+    inline auto convolve(E1&& a, E2&& v, E3 mode)
+    {
+        if (a.dimension() != 1 || v.dimension() != 1)
+        {
+            XTENSOR_THROW(std::runtime_error, "Invalid dimentions convolution arguments must be 1D expressions");
+        }
+
+        XTENSOR_ASSERT(a.size() > 0 && v.size() > 0);
+
+        // swap them so a is always the longest one
+        if (a.size() < v.size())
+        {
+            return detail::convolve_impl(std::forward<E2>(v), std::forward<E1>(a), mode);
+        }
+        else
+        {
+            return detail::convolve_impl(std::forward<E1>(a), std::forward<E2>(v), mode);
+        }
+    }
+}
+
+
+#endif

+ 431 - 0
3rd/numpy/include/xtensor/xmime.hpp

@@ -0,0 +1,431 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_MIME_HPP
+#define XTENSOR_MIME_HPP
+
+#include <cstddef>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <nlohmann/json.hpp>
+
+#include "xio.hpp"
+
+namespace xt
+{
+    template <class P, class T>
+    void compute_0d_table(std::stringstream& out, P& /*printer*/, const T& expr)
+    {
+        out << "<table style='border-style:solid;border-width:1px;'><tbody>";
+        out << "<tr><td style='font-family:monospace;'><pre>";
+        out << expr();
+        out << "</pre></td></tr>";
+        out << "</tbody></table>";
+    }
+
+    template <class P>
+    void compute_1d_row(std::stringstream& out, P& printer, const std::size_t& row_idx)
+    {
+        out << "<tr><td style='font-family:monospace;' title='" << row_idx << "'><pre>";
+        printer.print_next(out);
+        out << "</pre></td></tr>";
+    }
+
+    template <class P, class T>
+    void compute_1d_table(std::stringstream& out, P& printer, const T& expr, const std::size_t& edgeitems)
+    {
+        const auto& dim = expr.shape()[0];
+
+        out << "<table style='border-style:solid;border-width:1px;'><tbody>";
+        if (edgeitems == 0 || 2 * edgeitems >= dim)
+        {
+            for (std::size_t row_idx = 0; row_idx < dim; ++row_idx)
+            {
+                compute_1d_row(out, printer, row_idx);
+            }
+        }
+        else
+        {
+            for (std::size_t row_idx = 0; row_idx < edgeitems; ++row_idx)
+            {
+                compute_1d_row(out, printer, row_idx);
+            }
+            out << "<tr><td><center>\u22ee</center></td></tr>";
+            for (std::size_t row_idx = dim - edgeitems; row_idx < dim; ++row_idx)
+            {
+                compute_1d_row(out, printer, row_idx);
+            }
+        }
+        out << "</tbody></table>";
+    }
+
+    template <class P>
+    void compute_2d_element(
+        std::stringstream& out,
+        P& printer,
+        const std::string& idx_str,
+        const std::size_t& row_idx,
+        const std::size_t& column_idx
+    )
+    {
+        out << "<td style='font-family:monospace;' title='(" << idx_str << row_idx << ", " << column_idx
+            << ")'><pre>";
+        printer.print_next(out);
+        out << "</pre></td>";
+    }
+
+    template <class P, class T>
+    void compute_2d_row(
+        std::stringstream& out,
+        P& printer,
+        const T& expr,
+        const std::size_t& edgeitems,
+        const std::string& idx_str,
+        const std::size_t& row_idx
+    )
+    {
+        const auto& dim = expr.shape()[expr.dimension() - 1];
+
+        out << "<tr>";
+        if (edgeitems == 0 || 2 * edgeitems >= dim)
+        {
+            for (std::size_t column_idx = 0; column_idx < dim; ++column_idx)
+            {
+                compute_2d_element(out, printer, idx_str, row_idx, column_idx);
+            }
+        }
+        else
+        {
+            for (std::size_t column_idx = 0; column_idx < edgeitems; ++column_idx)
+            {
+                compute_2d_element(out, printer, idx_str, row_idx, column_idx);
+            }
+            out << "<td><center>\u22ef</center></td>";
+            for (std::size_t column_idx = dim - edgeitems; column_idx < dim; ++column_idx)
+            {
+                compute_2d_element(out, printer, idx_str, row_idx, column_idx);
+            }
+        }
+        out << "</tr>";
+    }
+
+    template <class P, class T, class I>
+    void compute_2d_table(
+        std::stringstream& out,
+        P& printer,
+        const T& expr,
+        const std::size_t& edgeitems,
+        const std::vector<I>& idx
+    )
+    {
+        const auto& dim = expr.shape()[expr.dimension() - 2];
+        const auto& last_dim = expr.shape()[expr.dimension() - 1];
+        std::string idx_str;
+        std::for_each(
+            idx.cbegin(),
+            idx.cend(),
+            [&idx_str](const auto& i)
+            {
+                idx_str += std::to_string(i) + ", ";
+            }
+        );
+
+        std::size_t nb_ellipsis = 2 * edgeitems + 1;
+        if (last_dim <= 2 * edgeitems + 1)
+        {
+            nb_ellipsis = last_dim;
+        }
+
+        out << "<table style='border-style:solid;border-width:1px;'><tbody>";
+        if (edgeitems == 0 || 2 * edgeitems >= dim)
+        {
+            for (std::size_t row_idx = 0; row_idx < dim; ++row_idx)
+            {
+                compute_2d_row(out, printer, expr, edgeitems, idx_str, row_idx);
+            }
+        }
+        else
+        {
+            for (std::size_t row_idx = 0; row_idx < edgeitems; ++row_idx)
+            {
+                compute_2d_row(out, printer, expr, edgeitems, idx_str, row_idx);
+            }
+            out << "<tr>";
+            for (std::size_t column_idx = 0; column_idx < nb_ellipsis; ++column_idx)
+            {
+                if (column_idx == edgeitems && nb_ellipsis != last_dim)
+                {
+                    out << "<td><center>\u22f1</center></td>";
+                }
+                else
+                {
+                    out << "<td><center>\u22ee</center></td>";
+                }
+            }
+            out << "</tr>";
+            for (std::size_t row_idx = dim - edgeitems; row_idx < dim; ++row_idx)
+            {
+                compute_2d_row(out, printer, expr, edgeitems, idx_str, row_idx);
+            }
+        }
+        out << "</tbody></table>";
+    }
+
+    template <class P, class T, class I>
+    void compute_nd_row(
+        std::stringstream& out,
+        P& printer,
+        const T& expr,
+        const std::size_t& edgeitems,
+        const std::vector<I>& idx
+    )
+    {
+        out << "<tr><td>";
+        compute_nd_table_impl(out, printer, expr, edgeitems, idx);
+        out << "</td></tr>";
+    }
+
+    template <class P, class T, class I>
+    void compute_nd_table_impl(
+        std::stringstream& out,
+        P& printer,
+        const T& expr,
+        const std::size_t& edgeitems,
+        const std::vector<I>& idx
+    )
+    {
+        const auto& displayed_dimension = idx.size();
+        const auto& expr_dim = expr.dimension();
+        const auto& dim = expr.shape()[displayed_dimension];
+
+        if (expr_dim - displayed_dimension == 2)
+        {
+            return compute_2d_table(out, printer, expr, edgeitems, idx);
+        }
+
+        std::vector<I> idx2 = idx;
+        idx2.resize(displayed_dimension + 1);
+
+        out << "<table style='border-style:solid;border-width:1px;'>";
+        if (edgeitems == 0 || 2 * edgeitems >= dim)
+        {
+            for (std::size_t i = 0; i < dim; ++i)
+            {
+                idx2[displayed_dimension] = i;
+                compute_nd_row(out, printer, expr, edgeitems, idx2);
+            }
+        }
+        else
+        {
+            for (std::size_t i = 0; i < edgeitems; ++i)
+            {
+                idx2[displayed_dimension] = i;
+                compute_nd_row(out, printer, expr, edgeitems, idx2);
+            }
+            out << "<tr><td><center>\u22ef</center></td></tr>";
+            for (std::size_t i = dim - edgeitems; i < dim; ++i)
+            {
+                idx2[displayed_dimension] = i;
+                compute_nd_row(out, printer, expr, edgeitems, idx2);
+            }
+        }
+        out << "</table>";
+    }
+
+    template <class P, class T>
+    void compute_nd_table(std::stringstream& out, P& printer, const T& expr, const std::size_t& edgeitems)
+    {
+        if (expr.dimension() == 0)
+        {
+            compute_0d_table(out, printer, expr);
+        }
+        else if (expr.dimension() == 1)
+        {
+            compute_1d_table(out, printer, expr, edgeitems);
+        }
+        else
+        {
+            std::vector<std::size_t> empty_vector;
+            compute_nd_table_impl(out, printer, expr, edgeitems, empty_vector);
+        }
+    }
+
+    template <class E>
+    nlohmann::json mime_bundle_repr_impl(const E& expr)
+    {
+        std::stringstream out;
+
+        std::size_t edgeitems = 0;
+        std::size_t size = compute_size(expr.shape());
+        if (size > static_cast<std::size_t>(print_options::print_options().threshold))
+        {
+            edgeitems = static_cast<std::size_t>(print_options::print_options().edge_items);
+        }
+
+        if (print_options::print_options().precision != -1)
+        {
+            out.precision(print_options::print_options().precision);
+        }
+
+        detail::printer<E> printer(out.precision());
+
+        xstrided_slice_vector slice_vector;
+        detail::recurser_run(printer, expr, slice_vector, edgeitems);
+        printer.init();
+
+        compute_nd_table(out, printer, expr, edgeitems);
+
+        auto bundle = nlohmann::json::object();
+        bundle["text/html"] = out.str();
+        return bundle;
+    }
+
+    template <class F, class CT>
+    class xfunctor_view;
+
+    template <class F, class CT>
+    nlohmann::json mime_bundle_repr(const xfunctor_view<F, CT>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class F, class... CT>
+    class xfunction;
+
+    template <class F, class... CT>
+    nlohmann::json mime_bundle_repr(const xfunction<F, CT...>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class EC, layout_type L, class SC, class Tag>
+    class xarray_container;
+
+    template <class EC, layout_type L, class SC, class Tag>
+    nlohmann::json mime_bundle_repr(const xarray_container<EC, L, SC, Tag>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    class xtensor_container;
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    nlohmann::json mime_bundle_repr(const xtensor_container<EC, N, L, Tag>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    class xfixed_container;
+
+    template <class ET, class S, layout_type L, bool SH, class Tag>
+    nlohmann::json mime_bundle_repr(const xfixed_container<ET, S, L, SH, Tag>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class F, class CT, class X, class O>
+    class xreducer;
+
+    template <class F, class CT, class X, class O>
+    nlohmann::json mime_bundle_repr(const xreducer<F, CT, X, O>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class VE, class FE>
+    class xoptional_assembly;
+
+    template <class VE, class FE>
+    nlohmann::json mime_bundle_repr(const xoptional_assembly<VE, FE>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class VEC, class FEC>
+    class xoptional_assembly_adaptor;
+
+    template <class VEC, class FEC>
+    nlohmann::json mime_bundle_repr(const xoptional_assembly_adaptor<VEC, FEC>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class CT>
+    class xscalar;
+
+    template <class CT>
+    nlohmann::json mime_bundle_repr(const xscalar<CT>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class CT, class X>
+    class xbroadcast;
+
+    template <class CT, class X>
+    nlohmann::json mime_bundle_repr(const xbroadcast<CT, X>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class F, class R, class S>
+    class xgenerator;
+
+    template <class F, class R, class S>
+    nlohmann::json mime_bundle_repr(const xgenerator<F, R, S>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class CT, class... S>
+    class xview;
+
+    template <class CT, class... S>
+    nlohmann::json mime_bundle_repr(const xview<CT, S...>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    class xstrided_view;
+
+    template <class CT, class S, layout_type L, class FST>
+    nlohmann::json mime_bundle_repr(const xstrided_view<CT, S, L, FST>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class CTD, class CTM>
+    class xmasked_view;
+
+    template <class CTD, class CTM>
+    nlohmann::json mime_bundle_repr(const xmasked_view<CTD, CTM>& expr)
+    {
+        return mime_bundle_repr_impl(expr);
+    }
+
+    template <class T, class B>
+    class xmasked_value;
+
+    template <class T, class B>
+    nlohmann::json mime_bundle_repr(const xmasked_value<T, B>& v)
+    {
+        auto bundle = nlohmann::json::object();
+        std::stringstream tmp;
+        tmp << v;
+        bundle["text/plain"] = tmp.str();
+        return bundle;
+    }
+}
+
+#endif

+ 131 - 0
3rd/numpy/include/xtensor/xmultiindex_iterator.hpp

@@ -0,0 +1,131 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_XMULTIINDEX_ITERATOR
+#define XTENSOR_XMULTIINDEX_ITERATOR
+
+#include "xstrided_view.hpp"
+#include "xtl/xsequence.hpp"
+
+namespace xt
+{
+
+    template <class S>
+    class xmultiindex_iterator
+    {
+    public:
+
+        using self_type = xmultiindex_iterator<S>;
+        using shape_type = S;
+
+        using value_type = shape_type;
+        using reference = value_type&;
+        using pointer = value_type*;
+        using difference_type = std::size_t;
+        using iterator_category = std::forward_iterator_tag;
+
+        xmultiindex_iterator() = default;
+
+        template <class B, class E, class C>
+        xmultiindex_iterator(B&& begin, E&& end, C&& current, const std::size_t linear_index)
+            : m_begin(std::forward<B>(begin))
+            , m_end(std::forward<E>(end))
+            , m_current(std::forward<C>(current))
+            , m_linear_index(linear_index)
+        {
+        }
+
+        self_type& operator++()
+        {
+            std::size_t i = m_begin.size();
+            while (i != 0)
+            {
+                --i;
+                if (m_current[i] + 1u == m_end[i])
+                {
+                    m_current[i] = m_begin[i];
+                }
+                else
+                {
+                    m_current[i] += 1;
+                    break;
+                }
+            }
+            m_linear_index++;
+            return *this;
+        }
+
+        self_type operator++(int)
+        {
+            self_type it = *this;
+            ++(*this);
+            return it;
+        }
+
+        shape_type& operator*()
+        {
+            return m_current;
+        }
+
+        const shape_type& operator*() const
+        {
+            return m_current;
+        }
+
+        bool operator==(const self_type& rhs) const
+        {
+            return m_linear_index == rhs.m_linear_index;
+        }
+
+        bool operator!=(const self_type& rhs) const
+        {
+            return !this->operator==(rhs);
+        }
+
+    private:
+
+        shape_type m_begin;
+        shape_type m_end;
+        shape_type m_current;
+        std::size_t m_linear_index{0};
+    };
+
+    template <class S, class B, class E>
+    auto multiindex_iterator_begin(B&& roi_begin, E&& roi_end)
+    {
+        S current;
+        resize_container(current, roi_begin.size());
+        std::copy(roi_begin.begin(), roi_begin.end(), current.begin());
+        return xmultiindex_iterator<S>(std::forward<B>(roi_begin), std::forward<E>(roi_end), std::move(current), 0);
+    }
+
+    template <class S, class B, class E>
+    auto multiindex_iterator_end(B&& roi_begin, E&& roi_end)
+    {
+        S current;
+        resize_container(current, roi_begin.size());
+        std::copy(roi_end.begin(), roi_end.end(), current.begin());
+
+        std::size_t linear_index = 1;
+        for (std::size_t i = 0; i < roi_begin.size(); ++i)
+        {
+            linear_index *= roi_end[i] - roi_begin[i];
+        }
+
+        return xmultiindex_iterator<S>(
+            std::forward<B>(roi_begin),
+            std::forward<E>(roi_end),
+            std::move(current),
+            linear_index
+        );
+    }
+
+}
+
+#endif

+ 230 - 0
3rd/numpy/include/xtensor/xnoalias.hpp

@@ -0,0 +1,230 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_NOALIAS_HPP
+#define XTENSOR_NOALIAS_HPP
+
+#include "xsemantic.hpp"
+
+namespace xt
+{
+
+    template <class A>
+    class noalias_proxy
+    {
+    public:
+
+        noalias_proxy(A a) noexcept;
+
+        template <class E>
+        disable_xexpression<E, A> operator=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator+=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator-=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator*=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator/=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator%=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator&=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator|=(const E&);
+
+        template <class E>
+        disable_xexpression<E, A> operator^=(const E&);
+
+        template <class E>
+        A operator=(const xexpression<E>& e);
+
+        template <class E>
+        A operator+=(const xexpression<E>& e);
+
+        template <class E>
+        A operator-=(const xexpression<E>& e);
+
+        template <class E>
+        A operator*=(const xexpression<E>& e);
+
+        template <class E>
+        A operator/=(const xexpression<E>& e);
+
+        template <class E>
+        A operator%=(const xexpression<E>& e);
+
+        template <class E>
+        A operator&=(const xexpression<E>&);
+
+        template <class E>
+        A operator|=(const xexpression<E>&);
+
+        template <class E>
+        A operator^=(const xexpression<E>&);
+
+    private:
+
+        A m_array;
+    };
+
+    template <class A>
+    noalias_proxy<xtl::closure_type_t<A>> noalias(A&& a) noexcept;
+
+    /********************************
+     * noalias_proxy implementation *
+     ********************************/
+
+    template <class A>
+    inline noalias_proxy<A>::noalias_proxy(A a) noexcept
+        : m_array(std::forward<A>(a))
+    {
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.assign(xscalar<E>(e));
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator+=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::plus<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator-=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::minus<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator*=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::multiplies<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator/=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::divides<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator%=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::modulus<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator&=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::bit_and<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator|=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::bit_or<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline auto noalias_proxy<A>::operator^=(const E& e) -> disable_xexpression<E, A>
+    {
+        return m_array.scalar_computed_assign(e, std::bit_xor<>());
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator=(const xexpression<E>& e)
+    {
+        return m_array.assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator+=(const xexpression<E>& e)
+    {
+        return m_array.plus_assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator-=(const xexpression<E>& e)
+    {
+        return m_array.minus_assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator*=(const xexpression<E>& e)
+    {
+        return m_array.multiplies_assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator/=(const xexpression<E>& e)
+    {
+        return m_array.divides_assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator%=(const xexpression<E>& e)
+    {
+        return m_array.modulus_assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator&=(const xexpression<E>& e)
+    {
+        return m_array.bit_and_assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator|=(const xexpression<E>& e)
+    {
+        return m_array.bit_or_assign(e);
+    }
+
+    template <class A>
+    template <class E>
+    inline A noalias_proxy<A>::operator^=(const xexpression<E>& e)
+    {
+        return m_array.bit_xor_assign(e);
+    }
+
+    template <class A>
+    inline noalias_proxy<xtl::closure_type_t<A>> noalias(A&& a) noexcept
+    {
+        return noalias_proxy<xtl::closure_type_t<A>>(a);
+    }
+}
+
+#endif

+ 661 - 0
3rd/numpy/include/xtensor/xnorm.hpp

@@ -0,0 +1,661 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) Ullrich Koethe
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_NORM_HPP
+#define XTENSOR_NORM_HPP
+
+#include <cmath>
+// std::abs(int) prior to C++ 17
+#include <complex>
+#include <cstdlib>
+
+#include <xtl/xtype_traits.hpp>
+
+#include "xmath.hpp"
+#include "xoperation.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    /********************************************
+     * type inference for norm and squared norm *
+     ********************************************/
+
+    template <class T>
+    struct norm_type;
+
+    template <class T>
+    struct squared_norm_type;
+
+    namespace traits_detail
+    {
+
+        template <class T, bool scalar = xtl::is_arithmetic<T>::value>
+        struct norm_of_scalar_impl;
+
+        template <class T>
+        struct norm_of_scalar_impl<T, false>
+        {
+            static const bool value = false;
+            using norm_type = void*;
+            using squared_norm_type = void*;
+        };
+
+        template <class T>
+        struct norm_of_scalar_impl<T, true>
+        {
+            static const bool value = true;
+            using norm_type = xtl::promote_type_t<T>;
+            using squared_norm_type = xtl::promote_type_t<T>;
+        };
+
+        template <class T, bool integral = xtl::is_integral<T>::value, bool floating = std::is_floating_point<T>::value>
+        struct norm_of_array_elements_impl;
+
+        template <>
+        struct norm_of_array_elements_impl<void*, false, false>
+        {
+            using norm_type = void*;
+            using squared_norm_type = void*;
+        };
+
+        template <class T>
+        struct norm_of_array_elements_impl<T, false, false>
+        {
+            using norm_type = typename norm_type<T>::type;
+            using squared_norm_type = typename squared_norm_type<T>::type;
+        };
+
+        template <class T>
+        struct norm_of_array_elements_impl<T, true, false>
+        {
+            static_assert(
+                !std::is_same<T, char>::value,
+                "'char' is not a numeric type, use 'signed char' or 'unsigned char'."
+            );
+
+            using norm_type = double;
+            using squared_norm_type = uint64_t;
+        };
+
+        template <class T>
+        struct norm_of_array_elements_impl<T, false, true>
+        {
+            using norm_type = double;
+            using squared_norm_type = double;
+        };
+
+        template <>
+        struct norm_of_array_elements_impl<long double, false, true>
+        {
+            using norm_type = long double;
+            using squared_norm_type = long double;
+        };
+
+        template <class ARRAY>
+        struct norm_of_vector_impl
+        {
+            static void* test(...);
+
+            template <class U>
+            static typename U::value_type test(U*, typename U::value_type* = 0);
+
+            using T = decltype(test(std::declval<ARRAY*>()));
+
+            static const bool value = !std::is_same<T, void*>::value;
+
+            using norm_type = typename norm_of_array_elements_impl<T>::norm_type;
+            using squared_norm_type = typename norm_of_array_elements_impl<T>::squared_norm_type;
+        };
+
+        template <class U>
+        struct norm_type_base
+        {
+            using T = std::decay_t<U>;
+
+            static_assert(
+                !std::is_same<T, char>::value,
+                "'char' is not a numeric type, use 'signed char' or 'unsigned char'."
+            );
+
+            using norm_of_scalar = norm_of_scalar_impl<T>;
+            using norm_of_vector = norm_of_vector_impl<T>;
+
+            static const bool value = norm_of_scalar::value || norm_of_vector::value;
+
+            static_assert(value, "norm_type<T> are undefined for type U.");
+        };
+    }  // namespace traits_detail
+
+    /**
+     * @brief Traits class for the result type of the <tt>norm_l2()</tt> function.
+     *
+     * Member 'type' defines the result of <tt>norm_l2(t)</tt>, where <tt>t</tt>
+     * is of type @tparam T. It implements the following rules designed to
+     * minimize the potential for overflow:
+     *   - @tparam T is an arithmetic type: 'type' is the result type of <tt>abs(t)</tt>.
+     *   - @tparam T is a container of 'long double' elements: 'type' is <tt>long double</tt>.
+     *   - @tparam T is a container of another arithmetic type: 'type' is <tt>double</tt>.
+     *   - @tparam T is a container of some other type: 'type' is the element's norm type,
+     *
+     * Containers are recognized by having an embedded typedef 'value_type'.
+     * To change the behavior for a case not covered here, specialize the
+     * <tt>traits_detail::norm_type_base</tt> template.
+     */
+    template <class T>
+    struct norm_type : traits_detail::norm_type_base<T>
+    {
+        using base_type = traits_detail::norm_type_base<T>;
+
+        using type = typename std::conditional<
+            base_type::norm_of_vector::value,
+            typename base_type::norm_of_vector::norm_type,
+            typename base_type::norm_of_scalar::norm_type>::type;
+    };
+
+    /**
+     * Abbreviation of 'typename norm_type<T>::type'.
+     */
+    template <class T>
+    using norm_type_t = typename norm_type<T>::type;
+
+    /**
+     * @brief Traits class for the result type of the <tt>norm_sq()</tt> function.
+     *
+     * Member 'type' defines the result of <tt>norm_sq(t)</tt>, where <tt>t</tt>
+     * is of type @tparam T. It implements the following rules designed to
+     * minimize the potential for overflow:
+     *   - @tparam T is an arithmetic type: 'type' is the result type of <tt>t*t</tt>.
+     *   - @tparam T is a container of 'long double' elements: 'type' is <tt>long double</tt>.
+     *   - @tparam T is a container of another floating-point type: 'type' is <tt>double</tt>.
+     *   - @tparam T is a container of integer elements: 'type' is <tt>uint64_t</tt>.
+     *   - @tparam T is a container of some other type: 'type' is the element's squared norm type,
+     *
+     *  Containers are recognized by having an embedded typedef 'value_type'.
+     *  To change the behavior for a case not covered here, specialize the
+     *  <tt>traits_detail::norm_type_base</tt> template.
+     */
+    template <class T>
+    struct squared_norm_type : traits_detail::norm_type_base<T>
+    {
+        using base_type = traits_detail::norm_type_base<T>;
+
+        using type = typename std::conditional<
+            base_type::norm_of_vector::value,
+            typename base_type::norm_of_vector::squared_norm_type,
+            typename base_type::norm_of_scalar::squared_norm_type>::type;
+    };
+
+    /**
+     * Abbreviation of 'typename squared_norm_type<T>::type'.
+     */
+    template <class T>
+    using squared_norm_type_t = typename squared_norm_type<T>::type;
+
+    /*************************************
+     * norm functions for built-in types *
+     *************************************/
+
+///@cond DOXYGEN_INCLUDE_SFINAE
+#define XTENSOR_DEFINE_SIGNED_NORMS(T)                                                \
+    inline auto norm_lp(T t, double p) noexcept                                       \
+    {                                                                                 \
+        using rt = decltype(std::abs(t));                                             \
+        return p == 0.0 ? static_cast<rt>(t != 0) : std::abs(t);                      \
+    }                                                                                 \
+    inline auto norm_lp_to_p(T t, double p) noexcept                                  \
+    {                                                                                 \
+        using rt = xtl::real_promote_type_t<T>;                                       \
+        return p == 0.0 ? static_cast<rt>(t != 0)                                     \
+                        : std::pow(static_cast<rt>(std::abs(t)), static_cast<rt>(p)); \
+    }                                                                                 \
+    inline std::size_t norm_l0(T t) noexcept                                          \
+    {                                                                                 \
+        return (t != 0);                                                              \
+    }                                                                                 \
+    inline auto norm_l1(T t) noexcept                                                 \
+    {                                                                                 \
+        return std::abs(t);                                                           \
+    }                                                                                 \
+    inline auto norm_l2(T t) noexcept                                                 \
+    {                                                                                 \
+        return std::abs(t);                                                           \
+    }                                                                                 \
+    inline auto norm_linf(T t) noexcept                                               \
+    {                                                                                 \
+        return std::abs(t);                                                           \
+    }                                                                                 \
+    inline auto norm_sq(T t) noexcept                                                 \
+    {                                                                                 \
+        return t * t;                                                                 \
+    }
+
+    XTENSOR_DEFINE_SIGNED_NORMS(signed char)
+    XTENSOR_DEFINE_SIGNED_NORMS(short)
+    XTENSOR_DEFINE_SIGNED_NORMS(int)
+    XTENSOR_DEFINE_SIGNED_NORMS(long)
+    XTENSOR_DEFINE_SIGNED_NORMS(long long)
+    XTENSOR_DEFINE_SIGNED_NORMS(float)
+    XTENSOR_DEFINE_SIGNED_NORMS(double)
+    XTENSOR_DEFINE_SIGNED_NORMS(long double)
+
+#undef XTENSOR_DEFINE_SIGNED_NORMS
+
+#define XTENSOR_DEFINE_UNSIGNED_NORMS(T)                                                              \
+    inline T norm_lp(T t, double p) noexcept                                                          \
+    {                                                                                                 \
+        return p == 0.0 ? (t != 0) : t;                                                               \
+    }                                                                                                 \
+    inline auto norm_lp_to_p(T t, double p) noexcept                                                  \
+    {                                                                                                 \
+        using rt = xtl::real_promote_type_t<T>;                                                       \
+        return p == 0.0 ? static_cast<rt>(t != 0) : std::pow(static_cast<rt>(t), static_cast<rt>(p)); \
+    }                                                                                                 \
+    inline T norm_l0(T t) noexcept                                                                    \
+    {                                                                                                 \
+        return t != 0 ? 1 : 0;                                                                        \
+    }                                                                                                 \
+    inline T norm_l1(T t) noexcept                                                                    \
+    {                                                                                                 \
+        return t;                                                                                     \
+    }                                                                                                 \
+    inline T norm_l2(T t) noexcept                                                                    \
+    {                                                                                                 \
+        return t;                                                                                     \
+    }                                                                                                 \
+    inline T norm_linf(T t) noexcept                                                                  \
+    {                                                                                                 \
+        return t;                                                                                     \
+    }                                                                                                 \
+    inline auto norm_sq(T t) noexcept                                                                 \
+    {                                                                                                 \
+        return t * t;                                                                                 \
+    }
+
+    XTENSOR_DEFINE_UNSIGNED_NORMS(unsigned char)
+    XTENSOR_DEFINE_UNSIGNED_NORMS(unsigned short)
+    XTENSOR_DEFINE_UNSIGNED_NORMS(unsigned int)
+    XTENSOR_DEFINE_UNSIGNED_NORMS(unsigned long)
+    XTENSOR_DEFINE_UNSIGNED_NORMS(unsigned long long)
+
+#undef XTENSOR_DEFINE_UNSIGNED_NORMS
+
+    /***********************************
+     * norm functions for std::complex *
+     ***********************************/
+
+    /**
+     * @brief L0 pseudo-norm of a complex number.
+     * Equivalent to <tt>t != 0</tt>.
+     */
+    template <class T>
+    inline uint64_t norm_l0(const std::complex<T>& t) noexcept
+    {
+        return t.real() != 0 || t.imag() != 0;
+    }
+
+    /**
+     * @brief L1 norm of a complex number.
+     */
+    template <class T>
+    inline auto norm_l1(const std::complex<T>& t) noexcept
+    {
+        return std::abs(t.real()) + std::abs(t.imag());
+    }
+
+    /**
+     * @brief L2 norm of a complex number.
+     * Equivalent to <tt>std::abs(t)</tt>.
+     */
+    template <class T>
+    inline auto norm_l2(const std::complex<T>& t) noexcept
+    {
+        return std::abs(t);
+    }
+
+    /**
+     * @brief Squared norm of a complex number.
+     * Equivalent to <tt>std::norm(t)</tt> (yes, the C++ standard really defines
+     * <tt>norm()</tt> to compute the squared norm).
+     */
+    template <class T>
+    inline auto norm_sq(const std::complex<T>& t) noexcept
+    {
+        // Does not use std::norm since it returns a std::complex on OSX
+        return t.real() * t.real() + t.imag() * t.imag();
+    }
+
+    /**
+     * @brief L-infinity norm of a complex number.
+     */
+    template <class T>
+    inline auto norm_linf(const std::complex<T>& t) noexcept
+    {
+        return (std::max)(std::abs(t.real()), std::abs(t.imag()));
+    }
+
+    /**
+     * @brief p-th power of the Lp norm of a complex number.
+     */
+    template <class T>
+    inline auto norm_lp_to_p(const std::complex<T>& t, double p) noexcept
+    {
+        using rt = decltype(std::pow(std::abs(t.real()), static_cast<T>(p)));
+        return p == 0 ? static_cast<rt>(t.real() != 0 || t.imag() != 0)
+                      : std::pow(std::abs(t.real()), static_cast<T>(p))
+                            + std::pow(std::abs(t.imag()), static_cast<T>(p));
+    }
+
+    /**
+     * @brief Lp norm of a complex number.
+     */
+    template <class T>
+    inline auto norm_lp(const std::complex<T>& t, double p) noexcept
+    {
+        return p == 0 ? norm_lp_to_p(t, p) : std::pow(norm_lp_to_p(t, p), 1.0 / p);
+    }
+
+    /***********************************
+     * norm functions for xexpressions *
+     ***********************************/
+
+#define XTENSOR_NORM_FUNCTION_AXES(NAME)                                                             \
+    template <class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>                \
+    inline auto NAME(E&& e, const I(&axes)[N], EVS es = EVS()) noexcept                              \
+    {                                                                                                \
+        using axes_type = std::array<typename std::decay_t<E>::size_type, N>;                        \
+        return NAME(std::forward<E>(e), xtl::forward_sequence<axes_type, decltype(axes)>(axes), es); \
+    }
+
+    namespace detail
+    {
+        template <class T>
+        struct norm_value_type
+        {
+            using type = T;
+        };
+
+        template <class T>
+        struct norm_value_type<std::complex<T>>
+        {
+            using type = T;
+        };
+
+        template <class T>
+        using norm_value_type_t = typename norm_value_type<T>::type;
+    }
+
+#define XTENSOR_EMPTY
+#define XTENSOR_COMMA ,
+#define XTENSOR_NORM_FUNCTION(NAME, RESULT_TYPE, REDUCE_EXPR, REDUCE_OP, MERGE_FUNC)                                       \
+    template <class E, class X, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)> \
+    inline auto NAME(E&& e, X&& axes, EVS es = EVS()) noexcept                                                             \
+    {                                                                                                                      \
+        using value_type = typename std::decay_t<E>::value_type;                                                           \
+        using result_type = detail::norm_value_type_t<RESULT_TYPE>;                                                        \
+                                                                                                                           \
+        auto reduce_func = [](result_type const& r, value_type const& v)                                                   \
+        {                                                                                                                  \
+            return REDUCE_EXPR(r REDUCE_OP NAME(v));                                                                       \
+        };                                                                                                                 \
+                                                                                                                           \
+        return xt::reduce(                                                                                                 \
+            make_xreducer_functor(std::move(reduce_func), const_value<result_type>(0), MERGE_FUNC<result_type>()),         \
+            std::forward<E>(e),                                                                                            \
+            std::forward<X>(axes),                                                                                         \
+            es                                                                                                             \
+        );                                                                                                                 \
+    }                                                                                                                      \
+                                                                                                                           \
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_xexpression<E>)>                             \
+    inline auto NAME(E&& e, EVS es = EVS()) noexcept                                                                       \
+    {                                                                                                                      \
+        return NAME(std::forward<E>(e), arange(e.dimension()), es);                                                        \
+    }                                                                                                                      \
+    XTENSOR_NORM_FUNCTION_AXES(NAME)
+
+    XTENSOR_NORM_FUNCTION(norm_l0, unsigned long long, XTENSOR_EMPTY, +, std::plus)
+    XTENSOR_NORM_FUNCTION(norm_l1, xtl::big_promote_type_t<value_type>, XTENSOR_EMPTY, +, std::plus)
+    XTENSOR_NORM_FUNCTION(norm_sq, xtl::big_promote_type_t<value_type>, XTENSOR_EMPTY, +, std::plus)
+    XTENSOR_NORM_FUNCTION(
+        norm_linf,
+        decltype(norm_linf(std::declval<value_type>())),
+        (std::max<result_type>),
+        XTENSOR_COMMA,
+        math::maximum
+    )
+
+#undef XTENSOR_EMPTY
+#undef XTENSOR_COMMA
+#undef XTENSOR_NORM_FUNCTION
+#undef XTENSOR_NORM_FUNCTION_AXES
+    /// @endcond
+    /**
+     * @ingroup red_functions
+     * @brief L0 (count) pseudo-norm of an array-like argument over given axes.
+     *
+     * Returns an \ref xreducer for the L0 pseudo-norm of the elements across given \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the norm is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     * When no axes are provided, the norm is calculated over the entire array. In this case,
+     * the reducer represents a scalar result, otherwise an array of appropriate dimension.
+     */
+    template <class E, class X, class EVS, class>
+    auto norm_l0(E&& e, X&& axes, EVS es) noexcept;
+
+    /**
+     * @ingroup red_functions
+     * @brief L1 norm of an array-like argument over given axes.
+     *
+     * Returns an \ref xreducer for the L1 norm of the elements across given \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the norm is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     * When no axes are provided, the norm is calculated over the entire array. In this case,
+     * the reducer represents a scalar result, otherwise an array of appropriate dimension.
+     */
+    template <class E, class X, class EVS, class>
+    auto norm_l1(E&& e, X&& axes, EVS es) noexcept;
+
+    /**
+     * @ingroup red_functions
+     * @brief Squared L2 norm of an array-like argument over given axes.
+     *
+     * Returns an \ref xreducer for the squared L2 norm of the elements across given \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the norm is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     * When no axes are provided, the norm is calculated over the entire array. In this case,
+     * the reducer represents a scalar result, otherwise an array of appropriate dimension.
+     */
+    template <class E, class X, class EVS, class>
+    auto norm_sq(E&& e, X&& axes, EVS es) noexcept;
+
+    /**
+     * @ingroup red_functions
+     * @brief L2 norm of a scalar or array-like argument.
+     * @param e an xexpression
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     *  For scalar types: implemented as <tt>abs(t)</tt><br>
+     *  otherwise: implemented as <tt>sqrt(norm_sq(t))</tt>.
+     */
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_xexpression<E>)>
+    inline auto norm_l2(E&& e, EVS es = EVS()) noexcept
+    {
+        using std::sqrt;
+        return sqrt(norm_sq(std::forward<E>(e), es));
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief L2 norm of an array-like argument over given axes.
+     *
+     * Returns an \ref xreducer for the L2 norm of the elements across given \em axes.
+     * @param e an \ref xexpression
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @param axes the axes along which the norm is computed
+     * @return an \ref xreducer (specifically: <tt>sqrt(norm_sq(e, axes))</tt>) (or xcontainer, depending on
+     * evaluation strategy)
+     */
+    template <
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(is_xexpression<E>, xtl::negation<is_reducer_options<X>>)>
+    inline auto norm_l2(E&& e, X&& axes, EVS es = EVS()) noexcept
+    {
+        return sqrt(norm_sq(std::forward<E>(e), std::forward<X>(axes), es));
+    }
+
+    template <class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto norm_l2(E&& e, const I (&axes)[N], EVS es = EVS()) noexcept
+    {
+        using axes_type = std::array<typename std::decay_t<E>::size_type, N>;
+        return sqrt(norm_sq(std::forward<E>(e), xtl::forward_sequence<axes_type, decltype(axes)>(axes), es));
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Infinity (maximum) norm of an array-like argument over given axes.
+     *
+     * Returns an \ref xreducer for the infinity norm of the elements across given \em axes.
+     * @param e an \ref xexpression
+     * @param axes the axes along which the norm is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     * When no axes are provided, the norm is calculated over the entire array. In this case,
+     * the reducer represents a scalar result, otherwise an array of appropriate dimension.
+     */
+    template <class E, class X, class EVS, class>
+    auto norm_linf(E&& e, X&& axes, EVS es) noexcept;
+
+    /**
+     * @ingroup red_functions
+     * @brief p-th power of the Lp norm of an array-like argument over given axes.
+     *
+     * Returns an \ref xreducer for the p-th power of the Lp norm of the elements across given \em axes.
+     * @param e an \ref xexpression
+     * @param p
+     * @param axes the axes along which the norm is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     * When no axes are provided, the norm is calculated over the entire array. In this case,
+     * the reducer represents a scalar result, otherwise an array of appropriate dimension.
+     */
+    template <class E, class X, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)>
+    inline auto norm_lp_to_p(E&& e, double p, X&& axes, EVS es = EVS()) noexcept
+    {
+        using value_type = typename std::decay_t<E>::value_type;
+        using result_type = norm_type_t<std::decay_t<E>>;
+
+        auto reduce_func = [p](const result_type& r, const value_type& v)
+        {
+            return r + norm_lp_to_p(v, p);
+        };
+        return xt::reduce(
+            make_xreducer_functor(std::move(reduce_func), xt::const_value<result_type>(0), std::plus<result_type>()),
+            std::forward<E>(e),
+            std::forward<X>(axes),
+            es
+        );
+    }
+
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_xexpression<E>)>
+    inline auto norm_lp_to_p(E&& e, double p, EVS es = EVS()) noexcept
+    {
+        return norm_lp_to_p(std::forward<E>(e), p, arange(e.dimension()), es);
+    }
+
+    template <class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto norm_lp_to_p(E&& e, double p, const I (&axes)[N], EVS es = EVS()) noexcept
+    {
+        using axes_type = std::array<typename std::decay_t<E>::size_type, N>;
+        return norm_lp_to_p(std::forward<E>(e), p, xtl::forward_sequence<axes_type, decltype(axes)>(axes), es);
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Lp norm of an array-like argument over given axes.
+     *
+     * Returns an \ref xreducer for the Lp norm (p != 0) of the elements across given \em axes.
+     * @param e an \ref xexpression
+     * @param p
+     * @param axes the axes along which the norm is computed (optional)
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     * When no axes are provided, the norm is calculated over the entire array. In this case,
+     * the reducer represents a scalar result, otherwise an array of appropriate dimension.
+     */
+    template <class E, class X, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(xtl::negation<is_reducer_options<X>>)>
+    inline auto norm_lp(E&& e, double p, X&& axes, EVS es = EVS())
+    {
+        XTENSOR_PRECONDITION(p != 0, "norm_lp(): p must be nonzero, use norm_l0() instead.");
+        return pow(norm_lp_to_p(std::forward<E>(e), p, std::forward<X>(axes), es), 1.0 / p);
+    }
+
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_xexpression<E>)>
+    inline auto norm_lp(E&& e, double p, EVS es = EVS())
+    {
+        return norm_lp(std::forward<E>(e), p, arange(e.dimension()), es);
+    }
+
+    template <class E, class I, std::size_t N, class EVS = DEFAULT_STRATEGY_REDUCERS>
+    inline auto norm_lp(E&& e, double p, const I (&axes)[N], EVS es = EVS())
+    {
+        using axes_type = std::array<typename std::decay_t<E>::size_type, N>;
+        return norm_lp(std::forward<E>(e), p, xtl::forward_sequence<axes_type, decltype(axes)>(axes), es);
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Induced L1 norm of a matrix.
+     *
+     * Returns an \ref xreducer for the induced L1 norm (i.e. the maximum of the L1 norms of e's columns).
+     * @param e a 2D \ref xexpression
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     */
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_xexpression<E>)>
+    inline auto norm_induced_l1(E&& e, EVS es = EVS())
+    {
+        XTENSOR_PRECONDITION(
+            e.dimension() == 2,
+            "norm_induced_l1(): only applicable to matrices (e.dimension() must be 2)."
+        );
+        return norm_linf(norm_l1(std::forward<E>(e), {0}, es), es);
+    }
+
+    /**
+     * @ingroup red_functions
+     * @brief Induced L-infinity norm of a matrix.
+     *
+     * Returns an \ref xreducer for the induced L-infinity norm (i.e. the maximum of the L1 norms of e's
+     * rows).
+     * @param e a 2D \ref xexpression
+     * @param es evaluation strategy to use (lazy (default), or immediate)
+     * @return an \ref xreducer (or xcontainer, depending on evaluation strategy)
+     */
+    template <class E, class EVS = DEFAULT_STRATEGY_REDUCERS, XTL_REQUIRES(is_xexpression<E>)>
+    inline auto norm_induced_linf(E&& e, EVS es = EVS())
+    {
+        XTENSOR_PRECONDITION(
+            e.dimension() == 2,
+            "norm_induced_linf(): only applicable to matrices (e.dimension() must be 2)."
+        );
+        return norm_linf(norm_l1(std::forward<E>(e), {1}, es), es);
+    }
+
+}  // namespace xt
+
+#endif

+ 803 - 0
3rd/numpy/include/xtensor/xnpy.hpp

@@ -0,0 +1,803 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright Leon Merten Lohse
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_NPY_HPP
+#define XTENSOR_NPY_HPP
+
+// Derived from https://github.com/llohse/libnpy by Leon Merten Lohse,
+// relicensed from MIT License with permission
+
+#include <algorithm>
+#include <complex>
+#include <cstdint>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <regex>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <typeinfo>
+#include <vector>
+
+#include <xtl/xplatform.hpp>
+#include <xtl/xsequence.hpp>
+
+#include "xtensor/xadapt.hpp"
+#include "xtensor/xarray.hpp"
+#include "xtensor/xeval.hpp"
+#include "xtensor/xstrides.hpp"
+
+#include "xtensor_config.hpp"
+
+namespace xt
+{
+    using namespace std::string_literals;
+
+    namespace detail
+    {
+
+        const char magic_string[] = "\x93NUMPY";
+        const std::size_t magic_string_length = sizeof(magic_string) - 1;
+
+        template <class O>
+        inline void write_magic(O& ostream, unsigned char v_major = 1, unsigned char v_minor = 0)
+        {
+            ostream.write(magic_string, magic_string_length);
+            ostream.put(char(v_major));
+            ostream.put(char(v_minor));
+        }
+
+        inline void read_magic(std::istream& istream, unsigned char* v_major, unsigned char* v_minor)
+        {
+            std::unique_ptr<char[]> buf(new char[magic_string_length + 2]);
+            istream.read(buf.get(), magic_string_length + 2);
+
+            if (!istream)
+            {
+                XTENSOR_THROW(std::runtime_error, "io error: failed reading file");
+            }
+
+            for (std::size_t i = 0; i < magic_string_length; i++)
+            {
+                if (buf[i] != magic_string[i])
+                {
+                    XTENSOR_THROW(std::runtime_error, "this file do not have a valid npy format.");
+                }
+            }
+
+            *v_major = static_cast<unsigned char>(buf[magic_string_length]);
+            *v_minor = static_cast<unsigned char>(buf[magic_string_length + 1]);
+        }
+
+        template <class T>
+        inline char map_type()
+        {
+            if (std::is_same<T, float>::value)
+            {
+                return 'f';
+            }
+            if (std::is_same<T, double>::value)
+            {
+                return 'f';
+            }
+            if (std::is_same<T, long double>::value)
+            {
+                return 'f';
+            }
+
+            if (std::is_same<T, char>::value)
+            {
+                return 'i';
+            }
+            if (std::is_same<T, signed char>::value)
+            {
+                return 'i';
+            }
+            if (std::is_same<T, short>::value)
+            {
+                return 'i';
+            }
+            if (std::is_same<T, int>::value)
+            {
+                return 'i';
+            }
+            if (std::is_same<T, long>::value)
+            {
+                return 'i';
+            }
+            if (std::is_same<T, long long>::value)
+            {
+                return 'i';
+            }
+
+            if (std::is_same<T, unsigned char>::value)
+            {
+                return 'u';
+            }
+            if (std::is_same<T, unsigned short>::value)
+            {
+                return 'u';
+            }
+            if (std::is_same<T, unsigned int>::value)
+            {
+                return 'u';
+            }
+            if (std::is_same<T, unsigned long>::value)
+            {
+                return 'u';
+            }
+            if (std::is_same<T, unsigned long long>::value)
+            {
+                return 'u';
+            }
+
+            if (std::is_same<T, bool>::value)
+            {
+                return 'b';
+            }
+
+            if (std::is_same<T, std::complex<float>>::value)
+            {
+                return 'c';
+            }
+            if (std::is_same<T, std::complex<double>>::value)
+            {
+                return 'c';
+            }
+            if (std::is_same<T, std::complex<long double>>::value)
+            {
+                return 'c';
+            }
+
+            XTENSOR_THROW(std::runtime_error, "Type not known.");
+        }
+
+        template <class T>
+        inline char get_endianess()
+        {
+            constexpr char little_endian_char = '<';
+            constexpr char big_endian_char = '>';
+            constexpr char no_endian_char = '|';
+
+            if (sizeof(T) <= sizeof(char))
+            {
+                return no_endian_char;
+            }
+
+            switch (xtl::endianness())
+            {
+                case xtl::endian::little_endian:
+                    return little_endian_char;
+                case xtl::endian::big_endian:
+                    return big_endian_char;
+                default:
+                    return no_endian_char;
+            }
+        }
+
+        template <class T>
+        inline std::string build_typestring()
+        {
+            std::stringstream ss;
+            ss << get_endianess<T>() << map_type<T>() << sizeof(T);
+            return ss.str();
+        }
+
+        // Safety check function
+        inline void parse_typestring(std::string typestring)
+        {
+            std::regex re("'([<>|])([ifucb])(\\d+)'");
+            std::smatch sm;
+
+            std::regex_match(typestring, sm, re);
+            if (sm.size() != 4)
+            {
+                XTENSOR_THROW(std::runtime_error, "invalid typestring");
+            }
+        }
+
+        // Helpers for the improvised parser
+        inline std::string unwrap_s(std::string s, char delim_front, char delim_back)
+        {
+            if ((s.back() == delim_back) && (s.front() == delim_front))
+            {
+                return s.substr(1, s.length() - 2);
+            }
+            else
+            {
+                XTENSOR_THROW(std::runtime_error, "unable to unwrap");
+            }
+        }
+
+        inline std::string get_value_from_map(std::string mapstr)
+        {
+            std::size_t sep_pos = mapstr.find_first_of(":");
+            if (sep_pos == std::string::npos)
+            {
+                return "";
+            }
+
+            return mapstr.substr(sep_pos + 1);
+        }
+
+        inline void pop_char(std::string& s, char c)
+        {
+            if (s.back() == c)
+            {
+                s.pop_back();
+            }
+        }
+
+        inline void
+        parse_header(std::string header, std::string& descr, bool* fortran_order, std::vector<std::size_t>& shape)
+        {
+            // The first 6 bytes are a magic string: exactly "x93NUMPY".
+            //
+            // The next 1 byte is an unsigned byte: the major version number of the file
+            // format, e.g. x01.
+            //
+            // The next 1 byte is an unsigned byte: the minor version number of the file
+            // format, e.g. x00. Note: the version of the file format is not tied to the
+            // version of the NumPy package.
+            //
+            // The next 2 bytes form a little-endian unsigned short int: the length of the
+            // header data HEADER_LEN.
+            //
+            // The next HEADER_LEN bytes form the header data describing the array's
+            // format. It is an ASCII string which contains a Python literal expression of
+            // a dictionary. It is terminated by a newline ('n') and padded with spaces
+            // ('x20') to make the total length of the magic string + 4 + HEADER_LEN be
+            // evenly divisible by 16 for alignment purposes.
+            //
+            // The dictionary contains three keys:
+            //
+            // "descr" : dtype.descr
+            // An object that can be passed as an argument to the numpy.dtype()
+            // constructor to create the array's dtype.
+            // "fortran_order" : bool
+            // Whether the array data is Fortran-contiguous or not. Since
+            // Fortran-contiguous arrays are a common form of non-C-contiguity, we allow
+            // them to be written directly to disk for efficiency.
+            // "shape" : tuple of int
+            // The shape of the array.
+            // For repeatability and readability, this dictionary is formatted using
+            // pprint.pformat() so the keys are in alphabetic order.
+
+            // remove trailing newline
+            if (header.back() != '\n')
+            {
+                XTENSOR_THROW(std::runtime_error, "invalid header");
+            }
+            header.pop_back();
+
+            // remove all whitespaces
+            header.erase(std::remove(header.begin(), header.end(), ' '), header.end());
+
+            // unwrap dictionary
+            header = unwrap_s(header, '{', '}');
+
+            // find the positions of the 3 dictionary keys
+            std::size_t keypos_descr = header.find("'descr'");
+            std::size_t keypos_fortran = header.find("'fortran_order'");
+            std::size_t keypos_shape = header.find("'shape'");
+
+            // make sure all the keys are present
+            if (keypos_descr == std::string::npos)
+            {
+                XTENSOR_THROW(std::runtime_error, "missing 'descr' key");
+            }
+            if (keypos_fortran == std::string::npos)
+            {
+                XTENSOR_THROW(std::runtime_error, "missing 'fortran_order' key");
+            }
+            if (keypos_shape == std::string::npos)
+            {
+                XTENSOR_THROW(std::runtime_error, "missing 'shape' key");
+            }
+
+            // Make sure the keys are in order.
+            // Note that this violates the standard, which states that readers *must* not
+            // depend on the correct order here.
+            // TODO: fix
+            if (keypos_descr >= keypos_fortran || keypos_fortran >= keypos_shape)
+            {
+                XTENSOR_THROW(std::runtime_error, "header keys in wrong order");
+            }
+
+            // get the 3 key-value pairs
+            std::string keyvalue_descr;
+            keyvalue_descr = header.substr(keypos_descr, keypos_fortran - keypos_descr);
+            pop_char(keyvalue_descr, ',');
+
+            std::string keyvalue_fortran;
+            keyvalue_fortran = header.substr(keypos_fortran, keypos_shape - keypos_fortran);
+            pop_char(keyvalue_fortran, ',');
+
+            std::string keyvalue_shape;
+            keyvalue_shape = header.substr(keypos_shape, std::string::npos);
+            pop_char(keyvalue_shape, ',');
+
+            // get the values (right side of `:')
+            std::string descr_s = get_value_from_map(keyvalue_descr);
+            std::string fortran_s = get_value_from_map(keyvalue_fortran);
+            std::string shape_s = get_value_from_map(keyvalue_shape);
+
+            parse_typestring(descr_s);
+            descr = unwrap_s(descr_s, '\'', '\'');
+
+            // convert literal Python bool to C++ bool
+            if (fortran_s == "True")
+            {
+                *fortran_order = true;
+            }
+            else if (fortran_s == "False")
+            {
+                *fortran_order = false;
+            }
+            else
+            {
+                XTENSOR_THROW(std::runtime_error, "invalid fortran_order value");
+            }
+
+            // parse the shape Python tuple ( x, y, z,)
+
+            // first clear the vector
+            shape.clear();
+            shape_s = unwrap_s(shape_s, '(', ')');
+
+            // a tokenizer would be nice...
+            std::size_t pos = 0;
+            for (;;)
+            {
+                std::size_t pos_next = shape_s.find_first_of(',', pos);
+                std::string dim_s;
+
+                if (pos_next != std::string::npos)
+                {
+                    dim_s = shape_s.substr(pos, pos_next - pos);
+                }
+                else
+                {
+                    dim_s = shape_s.substr(pos);
+                }
+
+                if (dim_s.length() == 0)
+                {
+                    if (pos_next != std::string::npos)
+                    {
+                        XTENSOR_THROW(std::runtime_error, "invalid shape");
+                    }
+                }
+                else
+                {
+                    std::stringstream ss;
+                    ss << dim_s;
+                    std::size_t tmp;
+                    ss >> tmp;
+                    shape.push_back(tmp);
+                }
+
+                if (pos_next != std::string::npos)
+                {
+                    pos = ++pos_next;
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
+
+        template <class O, class S>
+        inline void write_header(O& out, const std::string& descr, bool fortran_order, const S& shape)
+        {
+            std::ostringstream ss_header;
+            std::string s_fortran_order;
+            if (fortran_order)
+            {
+                s_fortran_order = "True";
+            }
+            else
+            {
+                s_fortran_order = "False";
+            }
+
+            std::string s_shape;
+            std::ostringstream ss_shape;
+            ss_shape << "(";
+            for (auto shape_it = std::begin(shape); shape_it != std::end(shape); ++shape_it)
+            {
+                ss_shape << *shape_it << ", ";
+            }
+            s_shape = ss_shape.str();
+            if (xtl::sequence_size(shape) > 1)
+            {
+                s_shape = s_shape.erase(s_shape.size() - 2);
+            }
+            else if (xtl::sequence_size(shape) == 1)
+            {
+                s_shape = s_shape.erase(s_shape.size() - 1);
+            }
+            s_shape += ")";
+
+            ss_header << "{'descr': '" << descr << "', 'fortran_order': " << s_fortran_order
+                      << ", 'shape': " << s_shape << ", }";
+
+            std::size_t header_len_pre = ss_header.str().length() + 1;
+            std::size_t metadata_len = magic_string_length + 2 + 2 + header_len_pre;
+
+            unsigned char version[2] = {1, 0};
+            if (metadata_len >= 255 * 255)
+            {
+                metadata_len = magic_string_length + 2 + 4 + header_len_pre;
+                version[0] = 2;
+                version[1] = 0;
+            }
+            std::size_t padding_len = 64 - (metadata_len % 64);
+            std::string padding(padding_len, ' ');
+            ss_header << padding;
+            ss_header << std::endl;
+
+            std::string header = ss_header.str();
+
+            // write magic
+            write_magic(out, version[0], version[1]);
+
+            // write header length
+            if (version[0] == 1 && version[1] == 0)
+            {
+                char header_len_le16[2];
+                uint16_t header_len = uint16_t(header.length());
+
+                header_len_le16[0] = char((header_len >> 0) & 0xff);
+                header_len_le16[1] = char((header_len >> 8) & 0xff);
+                out.write(reinterpret_cast<char*>(header_len_le16), 2);
+            }
+            else
+            {
+                char header_len_le32[4];
+                uint32_t header_len = uint32_t(header.length());
+
+                header_len_le32[0] = char((header_len >> 0) & 0xff);
+                header_len_le32[1] = char((header_len >> 8) & 0xff);
+                header_len_le32[2] = char((header_len >> 16) & 0xff);
+                header_len_le32[3] = char((header_len >> 24) & 0xff);
+                out.write(reinterpret_cast<char*>(header_len_le32), 4);
+            }
+
+            out << header;
+        }
+
+        inline std::string read_header_1_0(std::istream& istream)
+        {
+            // read header length and convert from little endian
+            char header_len_le16[2];
+            istream.read(header_len_le16, 2);
+
+            uint16_t header_length = uint16_t(header_len_le16[0] << 0) | uint16_t(header_len_le16[1] << 8);
+
+            if ((magic_string_length + 2 + 2 + header_length) % 16 != 0)
+            {
+                // TODO: display warning
+            }
+
+            std::unique_ptr<char[]> buf(new char[header_length]);
+            istream.read(buf.get(), header_length);
+            std::string header(buf.get(), header_length);
+
+            return header;
+        }
+
+        inline std::string read_header_2_0(std::istream& istream)
+        {
+            // read header length and convert from little endian
+            char header_len_le32[4];
+            istream.read(header_len_le32, 4);
+
+            uint32_t header_length = uint32_t(header_len_le32[0] << 0) | uint32_t(header_len_le32[1] << 8)
+                                     | uint32_t(header_len_le32[2] << 16) | uint32_t(header_len_le32[3] << 24);
+
+            if ((magic_string_length + 2 + 4 + header_length) % 16 != 0)
+            {
+                // TODO: display warning
+            }
+
+            std::unique_ptr<char[]> buf(new char[header_length]);
+            istream.read(buf.get(), header_length);
+            std::string header(buf.get(), header_length);
+
+            return header;
+        }
+
+        struct npy_file
+        {
+            npy_file() = default;
+
+            npy_file(std::vector<std::size_t>& shape, bool fortran_order, std::string typestring)
+                : m_shape(shape)
+                , m_fortran_order(fortran_order)
+                , m_typestring(typestring)
+            {
+                // Allocate memory
+                m_word_size = std::size_t(atoi(&typestring[2]));
+                m_n_bytes = compute_size(shape) * m_word_size;
+                m_buffer = std::allocator<char>{}.allocate(m_n_bytes);
+            }
+
+            ~npy_file()
+            {
+                if (m_buffer != nullptr)
+                {
+                    std::allocator<char>{}.deallocate(m_buffer, m_n_bytes);
+                }
+            }
+
+            // delete copy constructor
+            npy_file(const npy_file&) = delete;
+            npy_file& operator=(const npy_file&) = delete;
+
+            // implement move constructor and assignment
+            npy_file(npy_file&& rhs)
+                : m_shape(std::move(rhs.m_shape))
+                , m_fortran_order(std::move(rhs.m_fortran_order))
+                , m_word_size(std::move(rhs.m_word_size))
+                , m_n_bytes(std::move(rhs.m_n_bytes))
+                , m_typestring(std::move(rhs.m_typestring))
+                , m_buffer(rhs.m_buffer)
+            {
+                rhs.m_buffer = nullptr;
+            }
+
+            npy_file& operator=(npy_file&& rhs)
+            {
+                if (this != &rhs)
+                {
+                    m_shape = std::move(rhs.m_shape);
+                    m_fortran_order = std::move(rhs.m_fortran_order);
+                    m_word_size = std::move(rhs.m_word_size);
+                    m_n_bytes = std::move(rhs.m_n_bytes);
+                    m_typestring = std::move(rhs.m_typestring);
+                    m_buffer = rhs.m_buffer;
+                    rhs.m_buffer = nullptr;
+                }
+                return *this;
+            }
+
+            template <class T, layout_type L>
+            auto cast_impl(bool check_type)
+            {
+                if (m_buffer == nullptr)
+                {
+                    XTENSOR_THROW(std::runtime_error, "This npy_file has already been cast.");
+                }
+                T* ptr = reinterpret_cast<T*>(&m_buffer[0]);
+                std::vector<std::size_t> strides(m_shape.size());
+                std::size_t sz = compute_size(m_shape);
+
+                // check if the typestring matches the given one
+                if (check_type && m_typestring != detail::build_typestring<T>())
+                {
+                    XTENSOR_THROW(
+                        std::runtime_error,
+                        "Cast error: formats not matching "s + m_typestring + " vs "s
+                            + detail::build_typestring<T>()
+                    );
+                }
+
+                if ((L == layout_type::column_major && !m_fortran_order)
+                    || (L == layout_type::row_major && m_fortran_order))
+                {
+                    XTENSOR_THROW(
+                        std::runtime_error,
+                        "Cast error: layout mismatch between npy file and requested layout."
+                    );
+                }
+
+                compute_strides(
+                    m_shape,
+                    m_fortran_order ? layout_type::column_major : layout_type::row_major,
+                    strides
+                );
+                std::vector<std::size_t> shape(m_shape);
+
+                return std::make_tuple(ptr, sz, std::move(shape), std::move(strides));
+            }
+
+            template <class T, layout_type L = layout_type::dynamic>
+            auto cast(bool check_type = true) &&
+            {
+                auto cast_elems = cast_impl<T, L>(check_type);
+                m_buffer = nullptr;
+                return adapt(
+                    std::move(std::get<0>(cast_elems)),
+                    std::get<1>(cast_elems),
+                    acquire_ownership(),
+                    std::get<2>(cast_elems),
+                    std::get<3>(cast_elems)
+                );
+            }
+
+            template <class T, layout_type L = layout_type::dynamic>
+            auto cast(bool check_type = true) const&
+            {
+                auto cast_elems = cast_impl<T, L>(check_type);
+                return adapt(
+                    std::get<0>(cast_elems),
+                    std::get<1>(cast_elems),
+                    no_ownership(),
+                    std::get<2>(cast_elems),
+                    std::get<3>(cast_elems)
+                );
+            }
+
+            template <class T, layout_type L = layout_type::dynamic>
+            auto cast(bool check_type = true) &
+            {
+                auto cast_elems = cast_impl<T, L>(check_type);
+                return adapt(
+                    std::get<0>(cast_elems),
+                    std::get<1>(cast_elems),
+                    no_ownership(),
+                    std::get<2>(cast_elems),
+                    std::get<3>(cast_elems)
+                );
+            }
+
+            char* ptr()
+            {
+                return m_buffer;
+            }
+
+            std::size_t n_bytes()
+            {
+                return m_n_bytes;
+            }
+
+            std::vector<std::size_t> m_shape;
+            bool m_fortran_order;
+            std::size_t m_word_size;
+            std::size_t m_n_bytes;
+            std::string m_typestring;
+            char* m_buffer;
+        };
+
+        inline npy_file load_npy_file(std::istream& stream)
+        {
+            // check magic bytes an version number
+            unsigned char v_major, v_minor;
+            detail::read_magic(stream, &v_major, &v_minor);
+
+            std::string header;
+
+            if (v_major == 1 && v_minor == 0)
+            {
+                header = detail::read_header_1_0(stream);
+            }
+            else if (v_major == 2 && v_minor == 0)
+            {
+                header = detail::read_header_2_0(stream);
+            }
+            else
+            {
+                XTENSOR_THROW(std::runtime_error, "unsupported file format version");
+            }
+
+            // parse header
+            bool fortran_order;
+            std::string typestr;
+
+            std::vector<std::size_t> shape;
+            detail::parse_header(header, typestr, &fortran_order, shape);
+
+            npy_file result(shape, fortran_order, typestr);
+            // read the data
+            stream.read(result.ptr(), std::streamsize((result.n_bytes())));
+            return result;
+        }
+
+        template <class O, class E>
+        inline void dump_npy_stream(O& stream, const xexpression<E>& e)
+        {
+            using value_type = typename E::value_type;
+            const E& ex = e.derived_cast();
+            auto&& eval_ex = eval(ex);
+            bool fortran_order = false;
+            if (eval_ex.layout() == layout_type::column_major && eval_ex.dimension() > 1)
+            {
+                fortran_order = true;
+            }
+
+            std::string typestring = detail::build_typestring<value_type>();
+
+            auto shape = eval_ex.shape();
+            detail::write_header(stream, typestring, fortran_order, shape);
+
+            std::size_t size = compute_size(shape);
+            stream.write(
+                reinterpret_cast<const char*>(eval_ex.data()),
+                std::streamsize((sizeof(value_type) * size))
+            );
+        }
+    }  // namespace detail
+
+    /**
+     * Save xexpression to NumPy npy format
+     *
+     * @param filename The filename or path to dump the data
+     * @param e the xexpression
+     */
+    template <typename E>
+    inline void dump_npy(const std::string& filename, const xexpression<E>& e)
+    {
+        std::ofstream stream(filename, std::ofstream::binary);
+        if (!stream)
+        {
+            XTENSOR_THROW(std::runtime_error, "IO Error: failed to open file: "s + filename);
+        }
+
+        detail::dump_npy_stream(stream, e);
+    }
+
+    /**
+     * Save xexpression to NumPy npy format in a string
+     *
+     * @param e the xexpression
+     */
+    template <typename E>
+    inline std::string dump_npy(const xexpression<E>& e)
+    {
+        std::stringstream stream;
+        detail::dump_npy_stream(stream, e);
+        return stream.str();
+    }
+
+    /**
+     * Loads a npy file (the NumPy storage format)
+     *
+     * @param stream An input stream from which to load the file
+     * @tparam T select the type of the npy file (note: currently there is
+     *           no dynamic casting if types do not match)
+     * @tparam L select layout_type::column_major if you stored data in
+     *           Fortran format
+     * @return xarray with contents from npy file
+     */
+    template <typename T, layout_type L = layout_type::dynamic>
+    inline auto load_npy(std::istream& stream)
+    {
+        detail::npy_file file = detail::load_npy_file(stream);
+        return std::move(file).cast<T, L>();
+    }
+
+    /**
+     * Loads a npy file (the NumPy storage format)
+     *
+     * @param filename The filename or path to the file
+     * @tparam T select the type of the npy file (note: currently there is
+     *           no dynamic casting if types do not match)
+     * @tparam L select layout_type::column_major if you stored data in
+     *           Fortran format
+     * @return xarray with contents from npy file
+     */
+    template <typename T, layout_type L = layout_type::dynamic>
+    inline auto load_npy(const std::string& filename)
+    {
+        std::ifstream stream(filename, std::ifstream::binary);
+        if (!stream)
+        {
+            XTENSOR_THROW(std::runtime_error, "io error: failed to open a file.");
+        }
+        return load_npy<T, L>(stream);
+    }
+
+}  // namespace xt
+
+#endif

+ 95 - 0
3rd/numpy/include/xtensor/xoffset_view.hpp

@@ -0,0 +1,95 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_OFFSET_VIEW_HPP
+#define XTENSOR_OFFSET_VIEW_HPP
+
+#include <xtl/xcomplex.hpp>
+
+#include "xtensor/xfunctor_view.hpp"
+
+namespace xt
+{
+    namespace detail
+    {
+        template <class M, std::size_t I>
+        struct offset_forwarder
+        {
+            using value_type = M;
+            using reference = M&;
+            using const_reference = const M&;
+            using pointer = M*;
+            using const_pointer = const M*;
+
+            using proxy = xtl::xproxy_wrapper<M>;
+
+            template <class value_type, class requested_type>
+            using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+            template <class T>
+            decltype(auto) operator()(T&& t) const
+            {
+                return xtl::forward_offset<M, I>(std::forward<T>(t));
+            }
+
+            template <
+                class align,
+                class requested_type,
+                std::size_t N,
+                class E,
+                class MF = M,
+                class = std::enable_if_t<
+                    (std::is_same<MF, double>::value || std::is_same<MF, float>::value) && I <= sizeof(MF),
+                    int>>
+            auto proxy_simd_load(const E& expr, std::size_t n) const
+            {
+                // TODO refactor using shuffle only
+                auto batch = expr.template load_simd<align, requested_type, N>(n);
+                if (I == 0)
+                {
+                    return batch.real();
+                }
+                else
+                {
+                    return batch.imag();
+                }
+            }
+
+            template <
+                class align,
+                class simd,
+                class E,
+                class MF = M,
+                class = std::enable_if_t<
+                    (std::is_same<MF, double>::value || std::is_same<MF, float>::value) && I <= sizeof(MF),
+                    int>>
+            auto proxy_simd_store(E& expr, std::size_t n, const simd& batch) const
+            {
+                auto x = expr.template load_simd<align, double, simd::size>(n);
+                if (I == 0)
+                {
+                    x.real() = batch;
+                }
+                else
+                {
+                    x.imag() = batch;
+                }
+                expr.template store_simd<align>(n, x);
+            }
+        };
+    }
+
+    template <class CT, class M, std::size_t I>
+    using xoffset_view = xfunctor_view<detail::offset_forwarder<M, I>, CT>;
+
+    template <class CT, class M, std::size_t I>
+    using xoffset_adaptor = xfunctor_adaptor<detail::offset_forwarder<M, I>, CT>;
+}
+
+#endif

+ 997 - 0
3rd/numpy/include/xtensor/xoperation.hpp

@@ -0,0 +1,997 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_OPERATION_HPP
+#define XTENSOR_OPERATION_HPP
+
+#include <algorithm>
+#include <functional>
+#include <type_traits>
+
+#include <xtl/xsequence.hpp>
+
+#include "xfunction.hpp"
+#include "xscalar.hpp"
+#include "xstrided_view.hpp"
+#include "xstrides.hpp"
+
+namespace xt
+{
+
+    /***********
+     * helpers *
+     ***********/
+
+#define UNARY_OPERATOR_FUNCTOR(NAME, OP)               \
+    struct NAME                                        \
+    {                                                  \
+        template <class A1>                            \
+        constexpr auto operator()(const A1& arg) const \
+        {                                              \
+            return OP arg;                             \
+        }                                              \
+        template <class B>                             \
+        constexpr auto simd_apply(const B& arg) const  \
+        {                                              \
+            return OP arg;                             \
+        }                                              \
+    }
+
+#define DEFINE_COMPLEX_OVERLOAD(OP)                                                           \
+    template <class T1, class T2, XTL_REQUIRES(xtl::negation<std::is_same<T1, T2>>)>          \
+    constexpr auto operator OP(const std::complex<T1>& arg1, const std::complex<T2>& arg2)    \
+    {                                                                                         \
+        using result_type = typename xtl::promote_type_t<std::complex<T1>, std::complex<T2>>; \
+        return (result_type(arg1) OP result_type(arg2));                                      \
+    }                                                                                         \
+                                                                                              \
+    template <class T1, class T2, XTL_REQUIRES(xtl::negation<std::is_same<T1, T2>>)>          \
+    constexpr auto operator OP(const T1& arg1, const std::complex<T2>& arg2)                  \
+    {                                                                                         \
+        using result_type = typename xtl::promote_type_t<T1, std::complex<T2>>;               \
+        return (result_type(arg1) OP result_type(arg2));                                      \
+    }                                                                                         \
+                                                                                              \
+    template <class T1, class T2, XTL_REQUIRES(xtl::negation<std::is_same<T1, T2>>)>          \
+    constexpr auto operator OP(const std::complex<T1>& arg1, const T2& arg2)                  \
+    {                                                                                         \
+        using result_type = typename xtl::promote_type_t<std::complex<T1>, T2>;               \
+        return (result_type(arg1) OP result_type(arg2));                                      \
+    }
+
+#define BINARY_OPERATOR_FUNCTOR(NAME, OP)                              \
+    struct NAME                                                        \
+    {                                                                  \
+        template <class T1, class T2>                                  \
+        constexpr auto operator()(T1&& arg1, T2&& arg2) const          \
+        {                                                              \
+            using xt::detail::operator OP;                             \
+            return (std::forward<T1>(arg1) OP std::forward<T2>(arg2)); \
+        }                                                              \
+        template <class B>                                             \
+        constexpr auto simd_apply(const B& arg1, const B& arg2) const  \
+        {                                                              \
+            return (arg1 OP arg2);                                     \
+        }                                                              \
+    }
+
+    namespace detail
+    {
+        DEFINE_COMPLEX_OVERLOAD(+);
+        DEFINE_COMPLEX_OVERLOAD(-);
+        DEFINE_COMPLEX_OVERLOAD(*);
+        DEFINE_COMPLEX_OVERLOAD(/);
+        DEFINE_COMPLEX_OVERLOAD(%);
+        DEFINE_COMPLEX_OVERLOAD(||);
+        DEFINE_COMPLEX_OVERLOAD(&&);
+        DEFINE_COMPLEX_OVERLOAD(|);
+        DEFINE_COMPLEX_OVERLOAD(&);
+        DEFINE_COMPLEX_OVERLOAD(^);
+        DEFINE_COMPLEX_OVERLOAD(<<);
+        DEFINE_COMPLEX_OVERLOAD(>>);
+        DEFINE_COMPLEX_OVERLOAD(<);
+        DEFINE_COMPLEX_OVERLOAD(<=);
+        DEFINE_COMPLEX_OVERLOAD(>);
+        DEFINE_COMPLEX_OVERLOAD(>=);
+        DEFINE_COMPLEX_OVERLOAD(==);
+        DEFINE_COMPLEX_OVERLOAD(!=);
+
+        UNARY_OPERATOR_FUNCTOR(identity, +);
+        UNARY_OPERATOR_FUNCTOR(negate, -);
+        BINARY_OPERATOR_FUNCTOR(plus, +);
+        BINARY_OPERATOR_FUNCTOR(minus, -);
+        BINARY_OPERATOR_FUNCTOR(multiplies, *);
+        BINARY_OPERATOR_FUNCTOR(divides, /);
+        BINARY_OPERATOR_FUNCTOR(modulus, %);
+        BINARY_OPERATOR_FUNCTOR(logical_or, ||);
+        BINARY_OPERATOR_FUNCTOR(logical_and, &&);
+        UNARY_OPERATOR_FUNCTOR(logical_not, !);
+        BINARY_OPERATOR_FUNCTOR(bitwise_or, |);
+        BINARY_OPERATOR_FUNCTOR(bitwise_and, &);
+        BINARY_OPERATOR_FUNCTOR(bitwise_xor, ^);
+        UNARY_OPERATOR_FUNCTOR(bitwise_not, ~);
+        BINARY_OPERATOR_FUNCTOR(left_shift, <<);
+        BINARY_OPERATOR_FUNCTOR(right_shift, >>);
+        BINARY_OPERATOR_FUNCTOR(less, <);
+        BINARY_OPERATOR_FUNCTOR(less_equal, <=);
+        BINARY_OPERATOR_FUNCTOR(greater, >);
+        BINARY_OPERATOR_FUNCTOR(greater_equal, >=);
+        BINARY_OPERATOR_FUNCTOR(equal_to, ==);
+        BINARY_OPERATOR_FUNCTOR(not_equal_to, !=);
+
+        struct conditional_ternary
+        {
+            template <class B>
+            using get_batch_bool = typename xt_simd::simd_traits<typename xt_simd::revert_simd_traits<B>::type>::bool_type;
+
+            template <class B, class A1, class A2>
+            constexpr auto operator()(const B& cond, const A1& v1, const A2& v2) const noexcept
+            {
+                return xtl::select(cond, v1, v2);
+            }
+
+            template <class B>
+            constexpr B simd_apply(const get_batch_bool<B>& t1, const B& t2, const B& t3) const noexcept
+            {
+                return xt_simd::select(t1, t2, t3);
+            }
+        };
+
+        template <class R>
+        struct cast
+        {
+            struct functor
+            {
+                using result_type = R;
+
+                template <class A1>
+                constexpr result_type operator()(const A1& arg) const
+                {
+                    return static_cast<R>(arg);
+                }
+
+                // SIMD conversion disabled for now since it does not make sense
+                // in most of the cases
+                /*constexpr simd_result_type simd_apply(const simd_value_type& arg) const
+                {
+                    return static_cast<R>(arg);
+                }*/
+            };
+        };
+
+        template <class Tag, class F, class... E>
+        struct select_xfunction_expression;
+
+        template <class F, class... E>
+        struct select_xfunction_expression<xtensor_expression_tag, F, E...>
+        {
+            using type = xfunction<F, E...>;
+        };
+
+        template <class F, class... E>
+        struct select_xfunction_expression<xoptional_expression_tag, F, E...>
+        {
+            using type = xfunction<F, E...>;
+        };
+
+        template <class Tag, class F, class... E>
+        using select_xfunction_expression_t = typename select_xfunction_expression<Tag, F, E...>::type;
+
+        template <class F, class... E>
+        struct xfunction_type
+        {
+            using expression_tag = xexpression_tag_t<E...>;
+            using functor_type = F;
+            using type = select_xfunction_expression_t<expression_tag, functor_type, const_xclosure_t<E>...>;
+        };
+
+        template <class F, class... E>
+        inline auto make_xfunction(E&&... e) noexcept
+        {
+            using function_type = xfunction_type<F, E...>;
+            using functor_type = typename function_type::functor_type;
+            using type = typename function_type::type;
+            return type(functor_type(), std::forward<E>(e)...);
+        }
+
+        // On MSVC, the second argument of enable_if_t is always evaluated, even if the condition is false.
+        // Wrapping the xfunction type in the xfunction_type metafunction avoids this evaluation when
+        // the condition is false, since it leads to a tricky bug preventing from using operator+ and
+        // operator- on vector and arrays iterators.
+        template <class F, class... E>
+        using xfunction_type_t = typename std::
+            enable_if_t<has_xexpression<std::decay_t<E>...>::value, xfunction_type<F, E...>>::type;
+    }
+
+#undef UNARY_OPERATOR_FUNCTOR
+#undef BINARY_OPERATOR_FUNCTOR
+
+    /*************
+     * operators *
+     *************/
+
+    /**
+     * @defgroup arithmetic_operators Arithmetic operators
+     */
+
+    /**
+     * @ingroup arithmetic_operators
+     * @brief Identity
+     *
+     * Returns an \ref xfunction for the element-wise identity
+     * of \a e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto operator+(E&& e) noexcept -> detail::xfunction_type_t<detail::identity, E>
+    {
+        return detail::make_xfunction<detail::identity>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup arithmetic_operators
+     * @brief Opposite
+     *
+     * Returns an \ref xfunction for the element-wise opposite
+     * of \a e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto operator-(E&& e) noexcept -> detail::xfunction_type_t<detail::negate, E>
+    {
+        return detail::make_xfunction<detail::negate>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup arithmetic_operators
+     * @brief Addition
+     *
+     * Returns an \ref xfunction for the element-wise addition
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator+(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::plus, E1, E2>
+    {
+        return detail::make_xfunction<detail::plus>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup arithmetic_operators
+     * @brief Substraction
+     *
+     * Returns an \ref xfunction for the element-wise substraction
+     * of \a e2 to \a e1.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator-(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::minus, E1, E2>
+    {
+        return detail::make_xfunction<detail::minus>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup arithmetic_operators
+     * @brief Multiplication
+     *
+     * Returns an \ref xfunction for the element-wise multiplication
+     * of \a e1 by \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator*(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::multiplies, E1, E2>
+    {
+        return detail::make_xfunction<detail::multiplies>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup arithmetic_operators
+     * @brief Division
+     *
+     * Returns an \ref xfunction for the element-wise division
+     * of \a e1 by \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator/(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::divides, E1, E2>
+    {
+        return detail::make_xfunction<detail::divides>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup arithmetic_operators
+     * @brief Modulus
+     *
+     * Returns an \ref xfunction for the element-wise modulus
+     * of \a e1 by \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator%(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::modulus, E1, E2>
+    {
+        return detail::make_xfunction<detail::modulus>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @defgroup logical_operators Logical operators
+     */
+
+    /**
+     * @ingroup logical_operators
+     * @brief Or
+     *
+     * Returns an \ref xfunction for the element-wise or
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator||(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::logical_or, E1, E2>
+    {
+        return detail::make_xfunction<detail::logical_or>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief And
+     *
+     * Returns an \ref xfunction for the element-wise and
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator&&(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::logical_and, E1, E2>
+    {
+        return detail::make_xfunction<detail::logical_and>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief Not
+     *
+     * Returns an \ref xfunction for the element-wise not
+     * of \a e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto operator!(E&& e) noexcept -> detail::xfunction_type_t<detail::logical_not, E>
+    {
+        return detail::make_xfunction<detail::logical_not>(std::forward<E>(e));
+    }
+
+    /**
+     * @defgroup bitwise_operators Bitwise operators
+     */
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise and
+     *
+     * Returns an \ref xfunction for the element-wise bitwise and
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator&(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::bitwise_and, E1, E2>
+    {
+        return detail::make_xfunction<detail::bitwise_and>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise or
+     *
+     * Returns an \ref xfunction for the element-wise bitwise or
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator|(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::bitwise_or, E1, E2>
+    {
+        return detail::make_xfunction<detail::bitwise_or>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise xor
+     *
+     * Returns an \ref xfunction for the element-wise bitwise xor
+     * of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator^(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::bitwise_xor, E1, E2>
+    {
+        return detail::make_xfunction<detail::bitwise_xor>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise not
+     *
+     * Returns an \ref xfunction for the element-wise bitwise not
+     * of \a e.
+     * @param e an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E>
+    inline auto operator~(E&& e) noexcept -> detail::xfunction_type_t<detail::bitwise_not, E>
+    {
+        return detail::make_xfunction<detail::bitwise_not>(std::forward<E>(e));
+    }
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise left shift
+     *
+     * Returns an \ref xfunction for the element-wise bitwise left shift of e1
+     * by e2.
+     * @param e1 an \ref xexpression
+     * @param e2 an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto left_shift(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::left_shift, E1, E2>
+    {
+        return detail::make_xfunction<detail::left_shift>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise left shift
+     *
+     * Returns an \ref xfunction for the element-wise bitwise left shift of e1
+     * by e2.
+     * @param e1 an \ref xexpression
+     * @param e2 an \ref xexpression
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto right_shift(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::right_shift, E1, E2>
+    {
+        return detail::make_xfunction<detail::right_shift>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    namespace detail
+    {
+        // Shift operator is not available for all the types, so the xfunction type instantiation
+        // has to be delayed, enable_if_t is not sufficient
+        template <class F, class E1, class E2>
+        struct shift_function_getter
+        {
+            using type = xfunction_type_t<F, E1, E2>;
+        };
+
+        template <bool B, class T>
+        struct eval_enable_if
+        {
+            using type = typename T::type;
+        };
+
+        template <class T>
+        struct eval_enable_if<false, T>
+        {
+        };
+
+        template <bool B, class T>
+        using eval_enable_if_t = typename eval_enable_if<B, T>::type;
+
+        template <class F, class E1, class E2>
+        using shift_return_type_t = eval_enable_if_t<
+            is_xexpression<std::decay_t<E1>>::value,
+            shift_function_getter<F, E1, E2>>;
+    }
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise left shift
+     *
+     * Returns an \ref xfunction for the element-wise bitwise left shift of e1
+     * by e2.
+     * @param e1 an \ref xexpression
+     * @param e2 an \ref xexpression
+     * @return an \ref xfunction
+     * @sa left_shift
+     */
+    template <class E1, class E2>
+    inline auto operator<<(E1&& e1, E2&& e2) noexcept
+        -> detail::shift_return_type_t<detail::left_shift, E1, E2>
+    {
+        return left_shift(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup bitwise_operators
+     * @brief Bitwise right shift
+     *
+     * Returns an \ref xfunction for the element-wise bitwise right shift of e1
+     * by e2.
+     * @param e1 an \ref xexpression
+     * @param e2 an \ref xexpression
+     * @return an \ref xfunction
+     * @sa right_shift
+     */
+    template <class E1, class E2>
+    inline auto operator>>(E1&& e1, E2&& e2) -> detail::shift_return_type_t<detail::right_shift, E1, E2>
+    {
+        return right_shift(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @defgroup comparison_operators Comparison operators
+     */
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Lesser than
+     *
+     * Returns an \ref xfunction for the element-wise
+     * lesser than comparison of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator<(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::less, E1, E2>
+    {
+        return detail::make_xfunction<detail::less>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Lesser or equal
+     *
+     * Returns an \ref xfunction for the element-wise
+     * lesser or equal comparison of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator<=(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::less_equal, E1, E2>
+    {
+        return detail::make_xfunction<detail::less_equal>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Greater than
+     *
+     * Returns an \ref xfunction for the element-wise
+     * greater than comparison of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator>(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::greater, E1, E2>
+    {
+        return detail::make_xfunction<detail::greater>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Greater or equal
+     *
+     * Returns an \ref xfunction for the element-wise
+     * greater or equal comparison of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto operator>=(E1&& e1, E2&& e2) noexcept
+        -> detail::xfunction_type_t<detail::greater_equal, E1, E2>
+    {
+        return detail::make_xfunction<detail::greater_equal>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Equality
+     *
+     * Returns true if \a e1 and \a e2 have the same shape
+     * and hold the same values. Unlike other comparison
+     * operators, this does not return an \ref xfunction.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return a boolean
+     */
+    template <class E1, class E2>
+    inline std::enable_if_t<xoptional_comparable<E1, E2>::value, bool>
+    operator==(const xexpression<E1>& e1, const xexpression<E2>& e2)
+    {
+        const E1& de1 = e1.derived_cast();
+        const E2& de2 = e2.derived_cast();
+        bool res = de1.dimension() == de2.dimension()
+                   && std::equal(de1.shape().begin(), de1.shape().end(), de2.shape().begin());
+        auto iter1 = de1.begin();
+        auto iter2 = de2.begin();
+        auto iter_end = de1.end();
+        while (res && iter1 != iter_end)
+        {
+            res = (*iter1++ == *iter2++);
+        }
+        return res;
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Inequality
+     *
+     * Returns true if \a e1 and \a e2 have different shapes
+     * or hold the different values. Unlike other comparison
+     * operators, this does not return an \ref xfunction.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return a boolean
+     */
+    template <class E1, class E2>
+    inline bool operator!=(const xexpression<E1>& e1, const xexpression<E2>& e2)
+    {
+        return !(e1 == e2);
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Element-wise equality
+     *
+     * Returns an \ref xfunction for the element-wise
+     * equality of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto equal(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::equal_to, E1, E2>
+    {
+        return detail::make_xfunction<detail::equal_to>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Element-wise inequality
+     *
+     * Returns an \ref xfunction for the element-wise
+     * inequality of \a e1 and \a e2.
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto not_equal(E1&& e1, E2&& e2) noexcept -> detail::xfunction_type_t<detail::not_equal_to, E1, E2>
+    {
+        return detail::make_xfunction<detail::not_equal_to>(std::forward<E1>(e1), std::forward<E2>(e2));
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Lesser than
+     *
+     * Returns an \ref xfunction for the element-wise
+     * lesser than comparison of \a e1 and \a e2. This
+     * function is equivalent to operator<(E1&&, E2&&).
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto less(E1&& e1, E2&& e2) noexcept -> decltype(std::forward<E1>(e1) < std::forward<E2>(e2))
+    {
+        return std::forward<E1>(e1) < std::forward<E2>(e2);
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Lesser or equal
+     *
+     * Returns an \ref xfunction for the element-wise
+     * lesser or equal comparison of \a e1 and \a e2. This
+     * function is equivalent to operator<=(E1&&, E2&&).
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto less_equal(E1&& e1, E2&& e2) noexcept -> decltype(std::forward<E1>(e1) <= std::forward<E2>(e2))
+    {
+        return std::forward<E1>(e1) <= std::forward<E2>(e2);
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Greater than
+     *
+     * Returns an \ref xfunction for the element-wise
+     * greater than comparison of \a e1 and \a e2. This
+     * function is equivalent to operator>(E1&&, E2&&).
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto greater(E1&& e1, E2&& e2) noexcept -> decltype(std::forward<E1>(e1) > std::forward<E2>(e2))
+    {
+        return std::forward<E1>(e1) > std::forward<E2>(e2);
+    }
+
+    /**
+     * @ingroup comparison_operators
+     * @brief Greater or equal
+     *
+     * Returns an \ref xfunction for the element-wise
+     * greater or equal comparison of \a e1 and \a e2.
+     * This function is equivalent to operator>=(E1&&, E2&&).
+     * @param e1 an \ref xexpression or a scalar
+     * @param e2 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2>
+    inline auto greater_equal(E1&& e1, E2&& e2) noexcept
+        -> decltype(std::forward<E1>(e1) >= std::forward<E2>(e2))
+    {
+        return std::forward<E1>(e1) >= std::forward<E2>(e2);
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief Ternary selection
+     *
+     * Returns an \ref xfunction for the element-wise
+     * ternary selection (i.e. operator ? :) of \a e1,
+     * \a e2 and \a e3.
+     * @param e1 a boolean \ref xexpression
+     * @param e2 an \ref xexpression or a scalar
+     * @param e3 an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+    template <class E1, class E2, class E3>
+    inline auto where(E1&& e1, E2&& e2, E3&& e3) noexcept
+        -> detail::xfunction_type_t<detail::conditional_ternary, E1, E2, E3>
+    {
+        return detail::make_xfunction<detail::conditional_ternary>(
+            std::forward<E1>(e1),
+            std::forward<E2>(e2),
+            std::forward<E3>(e3)
+        );
+    }
+
+    namespace detail
+    {
+        template <layout_type L>
+        struct next_idx_impl;
+
+        template <>
+        struct next_idx_impl<layout_type::row_major>
+        {
+            template <class S, class I>
+            inline auto operator()(const S& shape, I& idx)
+            {
+                for (std::size_t j = shape.size(); j > 0; --j)
+                {
+                    std::size_t i = j - 1;
+                    if (idx[i] >= shape[i] - 1)
+                    {
+                        idx[i] = 0;
+                    }
+                    else
+                    {
+                        idx[i]++;
+                        return idx;
+                    }
+                }
+                // return empty index, happens at last iteration step, but remains unused
+                return I();
+            }
+        };
+
+        template <>
+        struct next_idx_impl<layout_type::column_major>
+        {
+            template <class S, class I>
+            inline auto operator()(const S& shape, I& idx)
+            {
+                for (std::size_t i = 0; i < shape.size(); ++i)
+                {
+                    if (idx[i] >= shape[i] - 1)
+                    {
+                        idx[i] = 0;
+                    }
+                    else
+                    {
+                        idx[i]++;
+                        return idx;
+                    }
+                }
+                // return empty index, happens at last iteration step, but remains unused
+                return I();
+            }
+        };
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class S, class I>
+        inline auto next_idx(const S& shape, I& idx)
+        {
+            next_idx_impl<L> nii;
+            return nii(shape, idx);
+        }
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief return vector of indices where T is not zero
+     *
+     * @param arr input array
+     * @return vector of vectors, one for each dimension of arr, containing
+     * the indices of the non-zero elements in that dimension
+     */
+    template <class T>
+    inline auto nonzero(const T& arr)
+    {
+        auto shape = arr.shape();
+        using index_type = xindex_type_t<typename T::shape_type>;
+        using size_type = typename T::size_type;
+
+        auto idx = xtl::make_sequence<index_type>(arr.dimension(), 0);
+        std::vector<std::vector<size_type>> indices(arr.dimension());
+
+        size_type total_size = compute_size(shape);
+        for (size_type i = 0; i < total_size; i++, detail::next_idx(shape, idx))
+        {
+            if (arr.element(std::begin(idx), std::end(idx)))
+            {
+                for (std::size_t n = 0; n < indices.size(); ++n)
+                {
+                    indices.at(n).push_back(idx[n]);
+                }
+            }
+        }
+
+        return indices;
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief return vector of indices where condition is true
+     *        (equivalent to \a nonzero(condition))
+     *
+     * @param condition input array
+     * @return vector of \a index_types where condition is not equal to zero
+     */
+    template <class T>
+    inline auto where(const T& condition)
+    {
+        return nonzero(condition);
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief return vector of indices where arr is not zero
+     *
+     * @tparam L the traversal order
+     * @param arr input array
+     * @return vector of index_types where arr is not equal to zero (use `xt::from_indices` to convert)
+     *
+     * @sa xt::from_indices
+     */
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class T>
+    inline auto argwhere(const T& arr)
+    {
+        auto shape = arr.shape();
+        using index_type = xindex_type_t<typename T::shape_type>;
+        using size_type = typename T::size_type;
+
+        auto idx = xtl::make_sequence<index_type>(arr.dimension(), 0);
+        std::vector<index_type> indices;
+
+        size_type total_size = compute_size(shape);
+        for (size_type i = 0; i < total_size; i++, detail::next_idx<L>(shape, idx))
+        {
+            if (arr.element(std::begin(idx), std::end(idx)))
+            {
+                indices.push_back(idx);
+            }
+        }
+
+        return indices;
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief Any
+     *
+     * Returns true if any of the values of \a e is truthy,
+     * false otherwise.
+     * @param e an \ref xexpression
+     * @return a boolean
+     */
+    template <class E>
+    inline bool any(E&& e)
+    {
+        using xtype = std::decay_t<E>;
+        using value_type = typename xtype::value_type;
+        return std::any_of(
+            e.cbegin(),
+            e.cend(),
+            [](const value_type& el)
+            {
+                return el;
+            }
+        );
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief Any
+     *
+     * Returns true if all of the values of \a e are truthy,
+     * false otherwise.
+     * @param e an \ref xexpression
+     * @return a boolean
+     */
+    template <class E>
+    inline bool all(E&& e)
+    {
+        using xtype = std::decay_t<E>;
+        using value_type = typename xtype::value_type;
+        return std::all_of(
+            e.cbegin(),
+            e.cend(),
+            [](const value_type& el)
+            {
+                return el;
+            }
+        );
+    }
+
+    /**
+     * @defgroup casting_operators Casting operators
+     */
+
+    /**
+     * @ingroup casting_operators
+     * @brief Element-wise ``static_cast``.
+     *
+     * Returns an \ref xfunction for the element-wise
+     * static_cast of \a e to type R.
+     *
+     * @param e an \ref xexpression or a scalar
+     * @return an \ref xfunction
+     */
+
+    template <class R, class E>
+    inline auto cast(E&& e) noexcept -> detail::xfunction_type_t<typename detail::cast<R>::functor, E>
+    {
+        return detail::make_xfunction<typename detail::cast<R>::functor>(std::forward<E>(e));
+    }
+
+}
+
+#endif

+ 323 - 0
3rd/numpy/include/xtensor/xpad.hpp

@@ -0,0 +1,323 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_PAD_HPP
+#define XTENSOR_PAD_HPP
+
+#include "xarray.hpp"
+#include "xstrided_view.hpp"
+#include "xtensor.hpp"
+#include "xview.hpp"
+
+using namespace xt::placeholders;  // to enable _ syntax
+
+namespace xt
+{
+    /**
+     * @brief Defines different algorithms to be used in ``xt::pad``:
+     * - ``constant``: Pads with a constant value.
+     * - ``symmetric``: Pads with the reflection of the vector mirrored along the edge of the array.
+     * - ``reflect``: Pads with the reflection of the vector mirrored on the first and last values
+     *   of the vector along each axis.
+     * - ``wrap``: Pads with the wrap of the vector along the axis. The first values are used to pad
+     *   the end and the end values are used to pad the beginning.
+     * - ``periodic`` : ``== wrap`` (pads with periodic repetitions of the vector).
+     *
+     * OpenCV to xtensor:
+     * - ``BORDER_CONSTANT == constant``
+     * - ``BORDER_REFLECT == symmetric``
+     * - ``BORDER_REFLECT_101 == reflect``
+     * - ``BORDER_WRAP == wrap``
+     */
+    enum class pad_mode
+    {
+        constant,
+        symmetric,
+        reflect,
+        wrap,
+        periodic,
+        edge
+    };
+
+    namespace detail
+    {
+        template <class S, class T>
+        inline bool check_pad_width(const std::vector<std::vector<S>>& pad_width, const T& shape)
+        {
+            if (pad_width.size() != shape.size())
+            {
+                return false;
+            }
+
+            return true;
+        }
+    }
+
+    /**
+     * @brief Pad an array.
+     *
+     * @param e The array.
+     * @param pad_width Number of values padded to the edges of each axis:
+     * `{{before_1, after_1}, ..., {before_N, after_N}}`.
+     * @param mode The type of algorithm to use. [default: `xt::pad_mode::constant`].
+     * @param constant_value The value to set the padded values for each axis
+     * (used in `xt::pad_mode::constant`).
+     * @return The padded array.
+     */
+    template <class E, class S = typename std::decay_t<E>::size_type, class V = typename std::decay_t<E>::value_type>
+    inline auto
+    pad(E&& e,
+        const std::vector<std::vector<S>>& pad_width,
+        pad_mode mode = pad_mode::constant,
+        V constant_value = 0)
+    {
+        XTENSOR_ASSERT(detail::check_pad_width(pad_width, e.shape()));
+
+        using size_type = typename std::decay_t<E>::size_type;
+        using return_type = temporary_type_t<E>;
+
+        // place the original array in the center
+
+        auto new_shape = e.shape();
+        xt::xstrided_slice_vector sv;
+        sv.reserve(e.shape().size());
+        for (size_type axis = 0; axis < e.shape().size(); ++axis)
+        {
+            size_type nb = static_cast<size_type>(pad_width[axis][0]);
+            size_type ne = static_cast<size_type>(pad_width[axis][1]);
+            size_type ns = nb + e.shape(axis) + ne;
+            new_shape[axis] = ns;
+            sv.push_back(xt::range(nb, nb + e.shape(axis)));
+        }
+
+        if (mode == pad_mode::constant)
+        {
+            return_type out(new_shape, constant_value);
+            xt::strided_view(out, sv) = e;
+            return out;
+        }
+
+        return_type out(new_shape);
+        xt::strided_view(out, sv) = e;
+
+        // construct padded regions based on original image
+
+        xt::xstrided_slice_vector svs(e.shape().size(), xt::all());
+        xt::xstrided_slice_vector svt(e.shape().size(), xt::all());
+
+        for (size_type axis = 0; axis < e.shape().size(); ++axis)
+        {
+            size_type nb = static_cast<size_type>(pad_width[axis][0]);
+            size_type ne = static_cast<size_type>(pad_width[axis][1]);
+
+            if (nb > static_cast<size_type>(0))
+            {
+                svt[axis] = xt::range(0, nb);
+
+                if (mode == pad_mode::wrap || mode == pad_mode::periodic)
+                {
+                    XTENSOR_ASSERT(nb <= e.shape(axis));
+                    svs[axis] = xt::range(e.shape(axis), nb + e.shape(axis));
+                    xt::strided_view(out, svt) = xt::strided_view(out, svs);
+                }
+                else if (mode == pad_mode::symmetric)
+                {
+                    XTENSOR_ASSERT(nb <= e.shape(axis));
+                    svs[axis] = xt::range(2 * nb - 1, nb - 1, -1);
+                    xt::strided_view(out, svt) = xt::strided_view(out, svs);
+                }
+                else if (mode == pad_mode::reflect)
+                {
+                    XTENSOR_ASSERT(nb <= e.shape(axis) - 1);
+                    svs[axis] = xt::range(2 * nb, nb, -1);
+                    xt::strided_view(out, svt) = xt::strided_view(out, svs);
+                }
+                else if (mode == pad_mode::edge)
+                {
+                    svs[axis] = xt::range(nb, nb + 1);
+                    xt::strided_view(out, svt) = xt::broadcast(
+                        xt::strided_view(out, svs),
+                        xt::strided_view(out, svt).shape()
+                    );
+                }
+            }
+
+            if (ne > static_cast<size_type>(0))
+            {
+                svt[axis] = xt::range(out.shape(axis) - ne, out.shape(axis));
+
+                if (mode == pad_mode::wrap || mode == pad_mode::periodic)
+                {
+                    XTENSOR_ASSERT(ne <= e.shape(axis));
+                    svs[axis] = xt::range(nb, nb + ne);
+                    xt::strided_view(out, svt) = xt::strided_view(out, svs);
+                }
+                else if (mode == pad_mode::symmetric)
+                {
+                    XTENSOR_ASSERT(ne <= e.shape(axis));
+                    if (ne == nb + e.shape(axis))
+                    {
+                        svs[axis] = xt::range(nb + e.shape(axis) - 1, _, -1);
+                    }
+                    else
+                    {
+                        svs[axis] = xt::range(nb + e.shape(axis) - 1, nb + e.shape(axis) - ne - 1, -1);
+                    }
+                    xt::strided_view(out, svt) = xt::strided_view(out, svs);
+                }
+                else if (mode == pad_mode::reflect)
+                {
+                    XTENSOR_ASSERT(ne <= e.shape(axis) - 1);
+                    if (ne == nb + e.shape(axis) - 1)
+                    {
+                        svs[axis] = xt::range(nb + e.shape(axis) - 2, _, -1);
+                    }
+                    else
+                    {
+                        svs[axis] = xt::range(nb + e.shape(axis) - 2, nb + e.shape(axis) - ne - 2, -1);
+                    }
+                    xt::strided_view(out, svt) = xt::strided_view(out, svs);
+                }
+                else if (mode == pad_mode::edge)
+                {
+                    svs[axis] = xt::range(out.shape(axis) - ne - 1, out.shape(axis) - ne);
+                    xt::strided_view(out, svt) = xt::broadcast(
+                        xt::strided_view(out, svs),
+                        xt::strided_view(out, svt).shape()
+                    );
+                }
+            }
+
+            svs[axis] = xt::all();
+            svt[axis] = xt::all();
+        }
+
+        return out;
+    }
+
+    /**
+     * @brief Pad an array.
+     *
+     * @param e The array.
+     * @param pad_width Number of values padded to the edges of each axis:
+     * `{before, after}`.
+     * @param mode The type of algorithm to use. [default: `xt::pad_mode::constant`].
+     * @param constant_value The value to set the padded values for each axis
+     * (used in `xt::pad_mode::constant`).
+     * @return The padded array.
+     */
+    template <class E, class S = typename std::decay_t<E>::size_type, class V = typename std::decay_t<E>::value_type>
+    inline auto
+    pad(E&& e, const std::vector<S>& pad_width, pad_mode mode = pad_mode::constant, V constant_value = 0)
+    {
+        std::vector<std::vector<S>> pw(e.shape().size(), pad_width);
+
+        return pad(e, pw, mode, constant_value);
+    }
+
+    /**
+     * @brief Pad an array.
+     *
+     * @param e The array.
+     * @param pad_width Number of values padded to the edges of each axis.
+     * @param mode The type of algorithm to use. [default: `xt::pad_mode::constant`].
+     * @param constant_value The value to set the padded values for each axis
+     * (used in `xt::pad_mode::constant`).
+     * @return The padded array.
+     */
+    template <class E, class S = typename std::decay_t<E>::size_type, class V = typename std::decay_t<E>::value_type>
+    inline auto pad(E&& e, S pad_width, pad_mode mode = pad_mode::constant, V constant_value = 0)
+    {
+        std::vector<std::vector<S>> pw(e.shape().size(), {pad_width, pad_width});
+
+        return pad(e, pw, mode, constant_value);
+    }
+
+    namespace detail
+    {
+
+        template <class E, class S>
+        inline auto tile(E&& e, const S& reps)
+        {
+            using size_type = typename std::decay_t<E>::size_type;
+
+            using return_type = temporary_type_t<E>;
+
+            XTENSOR_ASSERT(e.shape().size() == reps.size());
+
+            using new_shape_type = typename return_type::shape_type;
+            auto new_shape = xtl::make_sequence<new_shape_type>(e.shape().size());
+
+            xt::xstrided_slice_vector sv(reps.size());
+
+            for (size_type axis = 0; axis < reps.size(); ++axis)
+            {
+                new_shape[axis] = e.shape(axis) * reps[axis];
+                sv[axis] = xt::range(0, e.shape(axis));
+            }
+            return_type out(new_shape);
+
+            xt::strided_view(out, sv) = e;
+
+            xt::xstrided_slice_vector svs(e.shape().size(), xt::all());
+            xt::xstrided_slice_vector svt(e.shape().size(), xt::all());
+
+            for (size_type axis = 0; axis < e.shape().size(); ++axis)
+            {
+                for (size_type i = 1; i < static_cast<size_type>(reps[axis]); ++i)
+                {
+                    svs[axis] = xt::range(0, e.shape(axis));
+                    svt[axis] = xt::range(i * e.shape(axis), (i + 1) * e.shape(axis));
+                    xt::strided_view(out, svt) = xt::strided_view(out, svs);
+                    svs[axis] = xt::all();
+                    svt[axis] = xt::all();
+                }
+            }
+
+            return out;
+        }
+    }
+
+    /**
+     * @brief Tile an array.
+     *
+     * @param e The array.
+     * @param reps The number of repetitions of A along each axis.
+     * @return The tiled array.
+     */
+
+    template <class E, class S = typename std::decay_t<E>::size_type>
+    inline auto tile(E&& e, std::initializer_list<S> reps)
+    {
+        return detail::tile(std::forward<E>(e), std::vector<S>{reps});
+    }
+
+    template <class E, class C, XTL_REQUIRES(xtl::negation<xtl::is_integral<C>>)>
+    inline auto tile(E&& e, const C& reps)
+    {
+        return detail::tile(std::forward<E>(e), reps);
+    }
+
+    /**
+     * @brief Tile an array.
+     *
+     * @param e The array.
+     * @param reps The number of repetitions of A along the first axis.
+     * @return The tiled array.
+     */
+    template <class E, class S = typename std::decay_t<E>::size_type, XTL_REQUIRES(xtl::is_integral<S>)>
+    inline auto tile(E&& e, S reps)
+    {
+        std::vector<S> tw(e.shape().size(), static_cast<S>(1));
+        tw[0] = reps;
+        return detail::tile(std::forward<E>(e), tw);
+    }
+}
+
+#endif

+ 1007 - 0
3rd/numpy/include/xtensor/xrandom.hpp

@@ -0,0 +1,1007 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+/**
+ * @brief functions to obtain xgenerators generating random numbers with given shape
+ */
+
+#ifndef XTENSOR_RANDOM_HPP
+#define XTENSOR_RANDOM_HPP
+
+#include <algorithm>
+#include <functional>
+#include <random>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xspan.hpp>
+
+#include "xbuilder.hpp"
+#include "xgenerator.hpp"
+#include "xindex_view.hpp"
+#include "xmath.hpp"
+#include "xtensor.hpp"
+#include "xtensor_config.hpp"
+#include "xview.hpp"
+
+namespace xt
+{
+    /*********************
+     * Random generators *
+     *********************/
+
+    namespace random
+    {
+        using default_engine_type = std::mt19937;
+        using seed_type = default_engine_type::result_type;
+
+        default_engine_type& get_default_random_engine();
+        void seed(seed_type seed);
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto rand(const S& shape, T lower = 0, T upper = 1, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto randint(
+            const S& shape,
+            T lower = 0,
+            T upper = (std::numeric_limits<T>::max)(),
+            E& engine = random::get_default_random_engine()
+        );
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto randn(const S& shape, T mean = 0, T std_dev = 1, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class D = double, class E = random::default_engine_type>
+        auto
+        binomial(const S& shape, T trials = 1, D prob = 0.5, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class D = double, class E = random::default_engine_type>
+        auto geometric(const S& shape, D prob = 0.5, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class D = double, class E = random::default_engine_type>
+        auto
+        negative_binomial(const S& shape, T k = 1, D prob = 0.5, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class D = double, class E = random::default_engine_type>
+        auto poisson(const S& shape, D rate = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto exponential(const S& shape, T rate = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto
+        gamma(const S& shape, T alpha = 1.0, T beta = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto weibull(const S& shape, T a = 1.0, T b = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto
+        extreme_value(const S& shape, T a = 0.0, T b = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto
+        lognormal(const S& shape, T mean = 0, T std_dev = 1, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto chi_squared(const S& shape, T deg = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto cauchy(const S& shape, T a = 0.0, T b = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto fisher_f(const S& shape, T m = 1.0, T n = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class S, class E = random::default_engine_type>
+        auto student_t(const S& shape, T n = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto
+        rand(const I (&shape)[L], T lower = 0, T upper = 1, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto randint(
+            const I (&shape)[L],
+            T lower = 0,
+            T upper = (std::numeric_limits<T>::max)(),
+            E& engine = random::get_default_random_engine()
+        );
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto
+        randn(const I (&shape)[L], T mean = 0, T std_dev = 1, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class D = double, class E = random::default_engine_type>
+        auto
+        binomial(const I (&shape)[L], T trials = 1, D prob = 0.5, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class D = double, class E = random::default_engine_type>
+        auto geometric(const I (&shape)[L], D prob = 0.5, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class D = double, class E = random::default_engine_type>
+        auto negative_binomial(
+            const I (&shape)[L],
+            T k = 1,
+            D prob = 0.5,
+            E& engine = random::get_default_random_engine()
+        );
+
+        template <class T, class I, std::size_t L, class D = double, class E = random::default_engine_type>
+        auto poisson(const I (&shape)[L], D rate = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto exponential(const I (&shape)[L], T rate = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto
+        gamma(const I (&shape)[L], T alpha = 1.0, T beta = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto
+        weibull(const I (&shape)[L], T a = 1.0, T b = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto
+        extreme_value(const I (&shape)[L], T a = 0.0, T b = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto lognormal(
+            const I (&shape)[L],
+            T mean = 0.0,
+            T std_dev = 1.0,
+            E& engine = random::get_default_random_engine()
+        );
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto chi_squared(const I (&shape)[L], T deg = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto cauchy(const I (&shape)[L], T a = 0.0, T b = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto
+        fisher_f(const I (&shape)[L], T m = 1.0, T n = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class I, std::size_t L, class E = random::default_engine_type>
+        auto student_t(const I (&shape)[L], T n = 1.0, E& engine = random::get_default_random_engine());
+
+        template <class T, class E = random::default_engine_type>
+        void shuffle(xexpression<T>& e, E& engine = random::get_default_random_engine());
+
+        template <class T, class E = random::default_engine_type>
+        std::enable_if_t<xtl::is_integral<T>::value, xtensor<T, 1>>
+        permutation(T e, E& engine = random::get_default_random_engine());
+
+        template <class T, class E = random::default_engine_type>
+        std::enable_if_t<is_xexpression<std::decay_t<T>>::value, std::decay_t<T>>
+        permutation(T&& e, E& engine = random::get_default_random_engine());
+
+        template <class T, class E = random::default_engine_type>
+        xtensor<typename T::value_type, 1> choice(
+            const xexpression<T>& e,
+            std::size_t n,
+            bool replace = true,
+            E& engine = random::get_default_random_engine()
+        );
+
+        template <class T, class W, class E = random::default_engine_type>
+        xtensor<typename T::value_type, 1> choice(
+            const xexpression<T>& e,
+            std::size_t n,
+            const xexpression<W>& weights,
+            bool replace = true,
+            E& engine = random::get_default_random_engine()
+        );
+    }
+
+    namespace detail
+    {
+        template <class T, class E, class D>
+        struct random_impl
+        {
+            using value_type = T;
+
+            random_impl(E& engine, D&& dist)
+                : m_engine(engine)
+                , m_dist(std::move(dist))
+            {
+            }
+
+            template <class... Args>
+            inline value_type operator()(Args...) const
+            {
+                return m_dist(m_engine);
+            }
+
+            template <class It>
+            inline value_type element(It, It) const
+            {
+                return m_dist(m_engine);
+            }
+
+            template <class EX>
+            inline void assign_to(xexpression<EX>& e) const noexcept
+            {
+                // Note: we're not going row/col major here
+                auto& ed = e.derived_cast();
+                for (auto&& el : ed.storage())
+                {
+                    el = m_dist(m_engine);
+                }
+            }
+
+        private:
+
+            E& m_engine;
+            mutable D m_dist;
+        };
+    }
+
+    namespace random
+    {
+        /**
+         * Returns a reference to the default random number engine
+         */
+        inline default_engine_type& get_default_random_engine()
+        {
+            static default_engine_type mt;
+            return mt;
+        }
+
+        /**
+         * Seeds the default random number generator with @p seed
+         * @param seed The seed
+         */
+        inline void seed(seed_type seed)
+        {
+            get_default_random_engine().seed(seed);
+        }
+
+        /**
+         * xexpression with specified @p shape containing uniformly distributed random numbers
+         * in the interval from @p lower to @p upper, excluding upper.
+         *
+         * Numbers are drawn from @c std::uniform_real_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param lower lower bound
+         * @param upper upper bound
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto rand(const S& shape, T lower, T upper, E& engine)
+        {
+            std::uniform_real_distribution<T> dist(lower, upper);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing uniformly distributed
+         * random integers in the interval from @p lower to @p upper, excluding upper.
+         *
+         * Numbers are drawn from @c std::uniform_int_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param lower lower bound
+         * @param upper upper bound
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto randint(const S& shape, T lower, T upper, E& engine)
+        {
+            std::uniform_int_distribution<T> dist(lower, T(upper - 1));
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * the Normal (Gaussian) random number distribution with mean @p mean and
+         * standard deviation @p std_dev.
+         *
+         * Numbers are drawn from @c std::normal_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param mean mean of normal distribution
+         * @param std_dev standard deviation of normal distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto randn(const S& shape, T mean, T std_dev, E& engine)
+        {
+            std::normal_distribution<T> dist(mean, std_dev);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * the binomial random number distribution for @p trials trials with
+         * probability of success equal to @p prob.
+         *
+         * Numbers are drawn from @c std::binomial_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param trials number of Bernoulli trials
+         * @param prob probability of success of each trial
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class D, class E>
+        inline auto binomial(const S& shape, T trials, D prob, E& engine)
+        {
+            std::binomial_distribution<T> dist(trials, prob);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a gemoetric random number distribution with
+         * probability of success equal to @p prob for each of the Bernoulli trials.
+         *
+         * Numbers are drawn from @c std::geometric_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param prob probability of success of each trial
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class D, class E>
+        inline auto geometric(const S& shape, D prob, E& engine)
+        {
+            std::geometric_distribution<T> dist(prob);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a negative binomial random number distribution (also known as Pascal distribution)
+         * that returns the number of successes before @p k trials with probability of success
+         * equal to @p prob for each of the Bernoulli trials.
+         *
+         * Numbers are drawn from @c std::negative_binomial_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param k number of unsuccessful trials
+         * @param prob probability of success of each trial
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class D, class E>
+        inline auto negative_binomial(const S& shape, T k, D prob, E& engine)
+        {
+            std::negative_binomial_distribution<T> dist(k, prob);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a Poisson random number distribution with rate @p rate
+         *
+         * Numbers are drawn from @c std::poisson_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param rate rate of Poisson distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class D, class E>
+        inline auto poisson(const S& shape, D rate, E& engine)
+        {
+            std::poisson_distribution<T> dist(rate);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a exponential random number distribution with rate @p rate
+         *
+         * Numbers are drawn from @c std::exponential_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param rate rate of exponential distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto exponential(const S& shape, T rate, E& engine)
+        {
+            std::exponential_distribution<T> dist(rate);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a gamma random number distribution with shape @p alpha and scale @p beta
+         *
+         * Numbers are drawn from @c std::gamma_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param alpha shape of the gamma distribution
+         * @param beta scale of the gamma distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto gamma(const S& shape, T alpha, T beta, E& engine)
+        {
+            std::gamma_distribution<T> dist(alpha, beta);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a Weibull random number distribution with shape @p a and scale @p b
+         *
+         * Numbers are drawn from @c std::weibull_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param a shape of the weibull distribution
+         * @param b scale of the weibull distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto weibull(const S& shape, T a, T b, E& engine)
+        {
+            std::weibull_distribution<T> dist(a, b);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a extreme value random number distribution with shape @p a and scale @p b
+         *
+         * Numbers are drawn from @c std::extreme_value_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param a shape of the extreme value distribution
+         * @param b scale of the extreme value distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto extreme_value(const S& shape, T a, T b, E& engine)
+        {
+            std::extreme_value_distribution<T> dist(a, b);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * the Log-Normal random number distribution with mean @p mean and
+         * standard deviation @p std_dev.
+         *
+         * Numbers are drawn from @c std::lognormal_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param mean mean of normal distribution
+         * @param std_dev standard deviation of normal distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto lognormal(const S& shape, T mean, T std_dev, E& engine)
+        {
+            std::lognormal_distribution<T> dist(mean, std_dev);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * the chi-squared random number distribution with @p deg degrees of freedom.
+         *
+         * Numbers are drawn from @c std::chi_squared_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param deg degrees of freedom
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto chi_squared(const S& shape, T deg, E& engine)
+        {
+            std::chi_squared_distribution<T> dist(deg);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a Cauchy random number distribution with peak @p a and scale @p b
+         *
+         * Numbers are drawn from @c std::cauchy_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param a peak of the Cauchy distribution
+         * @param b scale of the Cauchy distribution
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto cauchy(const S& shape, T a, T b, E& engine)
+        {
+            std::cauchy_distribution<T> dist(a, b);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a Fisher-f random number distribution with numerator degrees of
+         * freedom equal to @p m and denominator degrees of freedom equal to @p n
+         *
+         * Numbers are drawn from @c std::fisher_f_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param m numerator degrees of freedom
+         * @param n denominator degrees of freedom
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto fisher_f(const S& shape, T m, T n, E& engine)
+        {
+            std::fisher_f_distribution<T> dist(m, n);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * xexpression with specified @p shape containing numbers sampled from
+         * a Student-t random number distribution with degrees of
+         * freedom equal to @p n
+         *
+         * Numbers are drawn from @c std::student_t_distribution.
+         *
+         * @param shape shape of resulting xexpression
+         * @param n degrees of freedom
+         * @param engine random number engine
+         * @tparam T number type to use
+         */
+        template <class T, class S, class E>
+        inline auto student_t(const S& shape, T n, E& engine)
+        {
+            std::student_t_distribution<T> dist(n);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto rand(const I (&shape)[L], T lower, T upper, E& engine)
+        {
+            std::uniform_real_distribution<T> dist(lower, upper);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto randint(const I (&shape)[L], T lower, T upper, E& engine)
+        {
+            std::uniform_int_distribution<T> dist(lower, T(upper - 1));
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto randn(const I (&shape)[L], T mean, T std_dev, E& engine)
+        {
+            std::normal_distribution<T> dist(mean, std_dev);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class D, class E>
+        inline auto binomial(const I (&shape)[L], T trials, D prob, E& engine)
+        {
+            std::binomial_distribution<T> dist(trials, prob);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class D, class E>
+        inline auto geometric(const I (&shape)[L], D prob, E& engine)
+        {
+            std::geometric_distribution<T> dist(prob);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class D, class E>
+        inline auto negative_binomial(const I (&shape)[L], T k, D prob, E& engine)
+        {
+            std::negative_binomial_distribution<T> dist(k, prob);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class D, class E>
+        inline auto poisson(const I (&shape)[L], D rate, E& engine)
+        {
+            std::poisson_distribution<T> dist(rate);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto exponential(const I (&shape)[L], T rate, E& engine)
+        {
+            std::exponential_distribution<T> dist(rate);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto gamma(const I (&shape)[L], T alpha, T beta, E& engine)
+        {
+            std::gamma_distribution<T> dist(alpha, beta);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto weibull(const I (&shape)[L], T a, T b, E& engine)
+        {
+            std::weibull_distribution<T> dist(a, b);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto extreme_value(const I (&shape)[L], T a, T b, E& engine)
+        {
+            std::extreme_value_distribution<T> dist(a, b);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto lognormal(const I (&shape)[L], T mean, T std_dev, E& engine)
+        {
+            std::lognormal_distribution<T> dist(mean, std_dev);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto chi_squared(const I (&shape)[L], T deg, E& engine)
+        {
+            std::chi_squared_distribution<T> dist(deg);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto cauchy(const I (&shape)[L], T a, T b, E& engine)
+        {
+            std::cauchy_distribution<T> dist(a, b);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto fisher_f(const I (&shape)[L], T m, T n, E& engine)
+        {
+            std::fisher_f_distribution<T> dist(m, n);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        template <class T, class I, std::size_t L, class E>
+        inline auto student_t(const I (&shape)[L], T n, E& engine)
+        {
+            std::student_t_distribution<T> dist(n);
+            return detail::make_xgenerator(
+                detail::random_impl<T, E, decltype(dist)>(engine, std::move(dist)),
+                shape
+            );
+        }
+
+        /**
+         * Randomly shuffle elements inplace in xcontainer along first axis.
+         * The order of sub-arrays is changed but their contents remain the same.
+         *
+         * @param e xcontainer to shuffle inplace
+         * @param engine random number engine
+         */
+        template <class T, class E>
+        void shuffle(xexpression<T>& e, E& engine)
+        {
+            T& de = e.derived_cast();
+
+            if (de.dimension() == 1)
+            {
+                using size_type = typename T::size_type;
+                auto first = de.begin();
+                auto last = de.end();
+
+                for (size_type i = std::size_t((last - first) - 1); i > 0; --i)
+                {
+                    std::uniform_int_distribution<size_type> dist(0, i);
+                    auto j = dist(engine);
+                    using std::swap;
+                    swap(first[i], first[j]);
+                }
+            }
+            else
+            {
+                using size_type = typename T::size_type;
+                decltype(auto) buf = empty_like(view(de, 0));
+
+                for (size_type i = de.shape()[0] - 1; i > 0; --i)
+                {
+                    std::uniform_int_distribution<size_type> dist(0, i);
+                    size_type j = dist(engine);
+
+                    buf = view(de, j);
+                    view(de, j) = view(de, i);
+                    view(de, i) = buf;
+                }
+            }
+        }
+
+        /**
+         * Randomly permute a sequence, or return a permuted range.
+         *
+         * If the first parameter is an integer, this function creates a new
+         * ``arange(e)`` and returns it randomly permuted. Otherwise, this
+         * function creates a copy of the input, passes it to @sa shuffle and
+         * returns the result.
+         *
+         * @param e input xexpression or integer
+         * @param engine random number engine to use (optional)
+         *
+         * @return randomly permuted copy of container or arange.
+         */
+        template <class T, class E>
+        std::enable_if_t<xtl::is_integral<T>::value, xtensor<T, 1>> permutation(T e, E& engine)
+        {
+            xt::xtensor<T, 1> res = xt::arange<T>(e);
+            shuffle(res, engine);
+            return res;
+        }
+
+        /// @cond DOXYGEN_INCLUDE_SFINAE
+        template <class T, class E>
+        std::enable_if_t<is_xexpression<std::decay_t<T>>::value, std::decay_t<T>> permutation(T&& e, E& engine)
+        {
+            using copy_type = std::decay_t<T>;
+            copy_type res = e;
+            shuffle(res, engine);
+            return res;
+        }
+
+        /// @endcond
+
+        /**
+         * Randomly select n unique elements from xexpression e.
+         * Note: this function makes a copy of your data, and only 1D data is accepted.
+         *
+         * @param e expression to sample from
+         * @param n number of elements to sample
+         * @param replace whether to sample with or without replacement
+         * @param engine random number engine
+         *
+         * @return xtensor containing 1D container of sampled elements
+         */
+        template <class T, class E>
+        xtensor<typename T::value_type, 1>
+        choice(const xexpression<T>& e, std::size_t n, bool replace, E& engine)
+        {
+            const auto& de = e.derived_cast();
+            XTENSOR_ASSERT((de.dimension() == 1));
+            XTENSOR_ASSERT((replace || n <= de.size()));
+            using result_type = xtensor<typename T::value_type, 1>;
+            using size_type = typename result_type::size_type;
+            result_type result;
+            result.resize({n});
+
+            if (replace)
+            {
+                auto dist = std::uniform_int_distribution<size_type>(0, de.size() - 1);
+                for (size_type i = 0; i < n; ++i)
+                {
+                    result[i] = de.storage()[dist(engine)];
+                }
+            }
+            else
+            {
+                // Naive resevoir sampling without weighting:
+                std::copy(de.storage().begin(), de.storage().begin() + n, result.begin());
+                size_type i = n;
+                for (auto it = de.storage().begin() + n; it != de.storage().end(); ++it, ++i)
+                {
+                    auto idx = std::uniform_int_distribution<size_type>(0, i)(engine);
+                    if (idx < n)
+                    {
+                        result.storage()[idx] = *it;
+                    }
+                }
+            }
+            return result;
+        }
+
+        /**
+         * Weighted random sampling.
+         *
+         * Randomly sample n unique elements from xexpression ``e`` using the discrete distribution
+         * parametrized by the weights ``w``. When sampling with replacement, this means that the probability
+         * to sample element ``e[i]`` is defined as
+         * ``w[i] / sum(w)``.
+         * Without replacement, this only describes the probability of the first sample element.
+         * In successive samples, the weight of items already sampled is assumed to be zero.
+         *
+         * For weighted random sampling with replacement, binary search with cumulative weights alogrithm is
+         * used. For weighted random sampling without replacement, the algorithm used is the exponential sort
+         * from [Efraimidis and Spirakis](https://doi.org/10.1016/j.ipl.2005.11.003) (2006) with the ``weight
+         * / randexp(1)`` [trick](https://web.archive.org/web/20201021162211/https://krlmlr.github.io/wrswoR/)
+         * from Kirill Müller.
+         *
+         * Note: this function makes a copy of your data, and only 1D data is accepted.
+         *
+         * @param e expression to sample from
+         * @param n number of elements to sample
+         * @param w expression for the weight distribution.
+         *          Weights must be positive and real-valued but need not sum to 1.
+         * @param replace set true to sample with replacement
+         * @param engine random number engine
+         *
+         * @return xtensor containing 1D container of sampled elements
+         */
+        template <class T, class W, class E>
+        xtensor<typename T::value_type, 1>
+        choice(const xexpression<T>& e, std::size_t n, const xexpression<W>& weights, bool replace, E& engine)
+        {
+            const auto& de = e.derived_cast();
+            const auto& dweights = weights.derived_cast();
+            XTENSOR_ASSERT((de.dimension() == 1));
+            XTENSOR_ASSERT((replace || n <= de.size()));
+            XTENSOR_ASSERT((de.size() == dweights.size()));
+            XTENSOR_ASSERT((de.dimension() == dweights.dimension()));
+            XTENSOR_ASSERT(xt::all(dweights >= 0));
+            static_assert(
+                std::is_floating_point<typename W::value_type>::value,
+                "Weight expression must be of floating point type"
+            );
+            using result_type = xtensor<typename T::value_type, 1>;
+            using size_type = typename result_type::size_type;
+            using weight_type = typename W::value_type;
+            result_type result;
+            result.resize({n});
+
+            if (replace)
+            {
+                // Sample u uniformly in the range [0, sum(weights)[
+                // The index idx of the sampled element is such that weight_cumul[idx - 1] <= u <
+                // weight_cumul[idx]. Where weight_cumul[-1] is implicitly 0, as the empty sum.
+                const auto wc = eval(cumsum(dweights));
+                std::uniform_real_distribution<weight_type> weight_dist{0, wc[wc.size() - 1]};
+                for (auto& x : result)
+                {
+                    const auto u = weight_dist(engine);
+                    const auto idx = static_cast<size_type>(
+                        std::upper_bound(wc.cbegin(), wc.cend(), u) - wc.cbegin()
+                    );
+                    x = de[idx];
+                }
+            }
+            else
+            {
+                // Compute (modified) keys as weight/randexp(1).
+                xtensor<weight_type, 1> keys;
+                keys.resize({dweights.size()});
+                std::exponential_distribution<weight_type> randexp{weight_type(1)};
+                std::transform(
+                    dweights.cbegin(),
+                    dweights.cend(),
+                    keys.begin(),
+                    [&randexp, &engine](auto w)
+                    {
+                        return w / randexp(engine);
+                    }
+                );
+
+                // Find indexes for the n biggest key
+                xtensor<size_type, 1> indices = arange<size_type>(0, dweights.size());
+                std::partial_sort(
+                    indices.begin(),
+                    indices.begin() + n,
+                    indices.end(),
+                    [&keys](auto i, auto j)
+                    {
+                        return keys[i] > keys[j];
+                    }
+                );
+
+                // Return samples with the n biggest keys
+                result = index_view(de, xtl::span<size_type>{indices.data(), n});
+            }
+            return result;
+        }
+
+    }
+}
+
+#endif

+ 1903 - 0
3rd/numpy/include/xtensor/xreducer.hpp

@@ -0,0 +1,1903 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_REDUCER_HPP
+#define XTENSOR_REDUCER_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <initializer_list>
+#include <iterator>
+#include <stdexcept>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xfunctional.hpp>
+#include <xtl/xsequence.hpp>
+
+#include "xaccessible.hpp"
+#include "xbuilder.hpp"
+#include "xeval.hpp"
+#include "xexpression.hpp"
+#include "xgenerator.hpp"
+#include "xiterable.hpp"
+#include "xtensor_config.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    template <template <class...> class A, class... AX, class X, XTL_REQUIRES(is_evaluation_strategy<AX>..., is_evaluation_strategy<X>)>
+    auto operator|(const A<AX...>& args, const A<X>& rhs)
+    {
+        return std::tuple_cat(args, rhs);
+    }
+
+    struct keep_dims_type : xt::detail::option_base
+    {
+    };
+
+    constexpr auto keep_dims = std::tuple<keep_dims_type>{};
+
+    template <class T = double>
+    struct xinitial : xt::detail::option_base
+    {
+        constexpr xinitial(T val)
+            : m_val(val)
+        {
+        }
+
+        constexpr T value() const
+        {
+            return m_val;
+        }
+
+        T m_val;
+    };
+
+    template <class T>
+    constexpr auto initial(T val)
+    {
+        return std::make_tuple(xinitial<T>(val));
+    }
+
+    template <std::ptrdiff_t I, class T, class Tuple>
+    struct tuple_idx_of_impl;
+
+    template <std::ptrdiff_t I, class T>
+    struct tuple_idx_of_impl<I, T, std::tuple<>>
+    {
+        static constexpr std::ptrdiff_t value = -1;
+    };
+
+    template <std::ptrdiff_t I, class T, class... Types>
+    struct tuple_idx_of_impl<I, T, std::tuple<T, Types...>>
+    {
+        static constexpr std::ptrdiff_t value = I;
+    };
+
+    template <std::ptrdiff_t I, class T, class U, class... Types>
+    struct tuple_idx_of_impl<I, T, std::tuple<U, Types...>>
+    {
+        static constexpr std::ptrdiff_t value = tuple_idx_of_impl<I + 1, T, std::tuple<Types...>>::value;
+    };
+
+    template <class S, class... X>
+    struct decay_all;
+
+    template <template <class...> class S, class... X>
+    struct decay_all<S<X...>>
+    {
+        using type = S<std::decay_t<X>...>;
+    };
+
+    template <class T, class Tuple>
+    struct tuple_idx_of
+    {
+        static constexpr std::ptrdiff_t
+            value = tuple_idx_of_impl<0, std::decay_t<T>, typename decay_all<Tuple>::type>::value;
+    };
+
+    template <class R, class T>
+    struct reducer_options
+    {
+        template <class X>
+        struct initial_tester : std::false_type
+        {
+        };
+
+        template <class X>
+        struct initial_tester<xinitial<X>> : std::true_type
+        {
+        };
+
+        // Workaround for Apple because tuple_cat is buggy!
+        template <class X>
+        struct initial_tester<const xinitial<X>> : std::true_type
+        {
+        };
+
+        using d_t = std::decay_t<T>;
+
+        static constexpr std::size_t initial_val_idx = xtl::mpl::find_if<initial_tester, d_t>::value;
+        reducer_options() = default;
+
+        reducer_options(const T& tpl)
+        {
+            xtl::mpl::static_if<initial_val_idx != std::tuple_size<T>::value>(
+                [this, &tpl](auto no_compile)
+                {
+                    // use no_compile to prevent compilation if initial_val_idx is out of bounds!
+                    this->initial_value = no_compile(
+                                              std::get < initial_val_idx != std::tuple_size<T>::value
+                                                  ? initial_val_idx
+                                                  : 0 > (tpl)
+                    )
+                                              .value();
+                },
+                [](auto /*np_compile*/) {}
+            );
+        }
+
+        using evaluation_strategy = std::conditional_t<
+            tuple_idx_of<xt::evaluation_strategy::immediate_type, d_t>::value != -1,
+            xt::evaluation_strategy::immediate_type,
+            xt::evaluation_strategy::lazy_type>;
+
+        using keep_dims = std::
+            conditional_t<tuple_idx_of<xt::keep_dims_type, d_t>::value != -1, std::true_type, std::false_type>;
+
+        static constexpr bool has_initial_value = initial_val_idx != std::tuple_size<d_t>::value;
+
+        R initial_value;
+
+        template <class NR>
+        using rebind_t = reducer_options<NR, T>;
+
+        template <class NR>
+        auto rebind(NR initial, const reducer_options<R, T>&) const
+        {
+            reducer_options<NR, T> res;
+            res.initial_value = initial;
+            return res;
+        }
+    };
+
+    template <class T>
+    struct is_reducer_options_impl : std::false_type
+    {
+    };
+
+    template <class... X>
+    struct is_reducer_options_impl<std::tuple<X...>> : std::true_type
+    {
+    };
+
+    template <class T>
+    struct is_reducer_options : is_reducer_options_impl<std::decay_t<T>>
+    {
+    };
+
+    /**********
+     * reduce *
+     **********/
+
+#define DEFAULT_STRATEGY_REDUCERS std::tuple<evaluation_strategy::lazy_type>
+
+    template <class ST, class X, class KD = std::false_type>
+    struct xreducer_shape_type;
+
+    template <class S1, class S2>
+    struct fixed_xreducer_shape_type;
+
+    namespace detail
+    {
+        template <class O, class RS, class R, class E, class AX>
+        inline void shape_computation(
+            RS& result_shape,
+            R& result,
+            E& expr,
+            const AX& axes,
+            std::enable_if_t<!detail::is_fixed<RS>::value, int> = 0
+        )
+        {
+            if (typename O::keep_dims())
+            {
+                resize_container(result_shape, expr.dimension());
+                for (std::size_t i = 0; i < expr.dimension(); ++i)
+                {
+                    if (std::find(axes.begin(), axes.end(), i) == axes.end())
+                    {
+                        // i not in axes!
+                        result_shape[i] = expr.shape()[i];
+                    }
+                    else
+                    {
+                        result_shape[i] = 1;
+                    }
+                }
+            }
+            else
+            {
+                resize_container(result_shape, expr.dimension() - axes.size());
+                for (std::size_t i = 0, idx = 0; i < expr.dimension(); ++i)
+                {
+                    if (std::find(axes.begin(), axes.end(), i) == axes.end())
+                    {
+                        // i not in axes!
+                        result_shape[idx] = expr.shape()[i];
+                        ++idx;
+                    }
+                }
+            }
+            result.resize(result_shape, expr.layout());
+        }
+
+        // skip shape computation if already done at compile time
+        template <class O, class RS, class R, class S, class AX>
+        inline void
+        shape_computation(RS&, R&, const S&, const AX&, std::enable_if_t<detail::is_fixed<RS>::value, int> = 0)
+        {
+        }
+    }
+
+    template <class F, class E, class R, XTL_REQUIRES(std::is_convertible<typename E::value_type, typename R::value_type>)>
+    inline void copy_to_reduced(F&, const E& e, R& result)
+    {
+        if (e.layout() == layout_type::row_major)
+        {
+            std::copy(
+                e.template cbegin<layout_type::row_major>(),
+                e.template cend<layout_type::row_major>(),
+                result.data()
+            );
+        }
+        else
+        {
+            std::copy(
+                e.template cbegin<layout_type::column_major>(),
+                e.template cend<layout_type::column_major>(),
+                result.data()
+            );
+        }
+    }
+
+    template <
+        class F,
+        class E,
+        class R,
+        XTL_REQUIRES(xtl::negation<std::is_convertible<typename E::value_type, typename R::value_type>>)>
+    inline void copy_to_reduced(F& f, const E& e, R& result)
+    {
+        if (e.layout() == layout_type::row_major)
+        {
+            std::transform(
+                e.template cbegin<layout_type::row_major>(),
+                e.template cend<layout_type::row_major>(),
+                result.data(),
+                f
+            );
+        }
+        else
+        {
+            std::transform(
+                e.template cbegin<layout_type::column_major>(),
+                e.template cend<layout_type::column_major>(),
+                result.data(),
+                f
+            );
+        }
+    }
+
+    template <class F, class E, class X, class O>
+    inline auto reduce_immediate(F&& f, E&& e, X&& axes, O&& raw_options)
+    {
+        using reduce_functor_type = typename std::decay_t<F>::reduce_functor_type;
+        using init_functor_type = typename std::decay_t<F>::init_functor_type;
+        using expr_value_type = typename std::decay_t<E>::value_type;
+        using result_type = std::decay_t<decltype(std::declval<reduce_functor_type>()(
+            std::declval<init_functor_type>()(),
+            std::declval<expr_value_type>()
+        ))>;
+
+        using options_t = reducer_options<result_type, std::decay_t<O>>;
+        options_t options(raw_options);
+
+        using shape_type = typename xreducer_shape_type<
+            typename std::decay_t<E>::shape_type,
+            std::decay_t<X>,
+            typename options_t::keep_dims>::type;
+        using result_container_type = typename detail::xtype_for_shape<
+            shape_type>::template type<result_type, std::decay_t<E>::static_layout>;
+        result_container_type result;
+
+        // retrieve functors from triple struct
+        auto reduce_fct = xt::get<0>(f);
+        auto init_fct = xt::get<1>(f);
+        auto merge_fct = xt::get<2>(f);
+
+        if (axes.size() == 0)
+        {
+            result.resize(e.shape(), e.layout());
+            auto cpf = [&reduce_fct, &init_fct](const auto& v)
+            {
+                return reduce_fct(static_cast<result_type>(init_fct()), v);
+            };
+            copy_to_reduced(cpf, e, result);
+            return result;
+        }
+
+        shape_type result_shape{};
+        dynamic_shape<std::size_t>
+            iter_shape = xtl::forward_sequence<dynamic_shape<std::size_t>, decltype(e.shape())>(e.shape());
+        dynamic_shape<std::size_t> iter_strides(e.dimension());
+
+        // std::less is used, because as the standard says (24.4.5):
+        // A sequence is sorted with respect to a comparator comp if for any iterator i pointing to the
+        // sequence and any non-negative integer n such that i + n is a valid iterator pointing to an element
+        // of the sequence, comp(*(i + n), *i) == false. Therefore less is required to detect duplicates.
+        if (!std::is_sorted(axes.cbegin(), axes.cend(), std::less<>()))
+        {
+            XTENSOR_THROW(std::runtime_error, "Reducing axes should be sorted.");
+        }
+        if (std::adjacent_find(axes.cbegin(), axes.cend()) != axes.cend())
+        {
+            XTENSOR_THROW(std::runtime_error, "Reducing axes should not contain duplicates.");
+        }
+        if (axes.size() != 0 && axes[axes.size() - 1] > e.dimension() - 1)
+        {
+            XTENSOR_THROW(
+                std::runtime_error,
+                "Axis " + std::to_string(axes[axes.size() - 1]) + " out of bounds for reduction."
+            );
+        }
+
+        detail::shape_computation<options_t>(result_shape, result, e, axes);
+
+        // Fast track for complete reduction
+        if (e.dimension() == axes.size())
+        {
+            result_type tmp = options_t::has_initial_value ? options.initial_value : init_fct();
+            result.data()[0] = std::accumulate(e.storage().begin(), e.storage().end(), tmp, reduce_fct);
+            return result;
+        }
+
+        std::size_t leading_ax = axes[(e.layout() == layout_type::row_major) ? axes.size() - 1 : 0];
+        auto strides_finder = e.strides().begin() + static_cast<std::ptrdiff_t>(leading_ax);
+        // The computed strides contain "0" where the shape is 1 -- therefore find the next none-zero number
+        std::size_t inner_stride = static_cast<std::size_t>(*strides_finder);
+        auto iter_bound = e.layout() == layout_type::row_major ? e.strides().begin() : (e.strides().end() - 1);
+        while (inner_stride == 0 && strides_finder != iter_bound)
+        {
+            (e.layout() == layout_type::row_major) ? --strides_finder : ++strides_finder;
+            inner_stride = static_cast<std::size_t>(*strides_finder);
+        }
+
+        if (inner_stride == 0)
+        {
+            auto cpf = [&reduce_fct, &init_fct](const auto& v)
+            {
+                return reduce_fct(static_cast<result_type>(init_fct()), v);
+            };
+            copy_to_reduced(cpf, e, result);
+            return result;
+        }
+
+        std::size_t inner_loop_size = static_cast<std::size_t>(inner_stride);
+        std::size_t outer_loop_size = e.shape()[leading_ax];
+
+        // The following code merges reduction axes "at the end" (or the beginning for col_major)
+        // together by increasing the size of the outer loop where appropriate
+        auto merge_loops = [&outer_loop_size, &e](auto it, auto end)
+        {
+            auto last_ax = *it;
+            ++it;
+            for (; it != end; ++it)
+            {
+                // note that we check is_sorted, so this condition is valid
+                if (std::abs(std::ptrdiff_t(*it) - std::ptrdiff_t(last_ax)) == 1)
+                {
+                    last_ax = *it;
+                    outer_loop_size *= e.shape()[last_ax];
+                }
+            }
+            return last_ax;
+        };
+
+        for (std::size_t i = 0, idx = 0; i < e.dimension(); ++i)
+        {
+            if (std::find(axes.begin(), axes.end(), i) == axes.end())
+            {
+                // i not in axes!
+                iter_strides[i] = static_cast<std::size_t>(result.strides(
+                )[typename options_t::keep_dims() ? i : idx]);
+                ++idx;
+            }
+        }
+
+        if (e.layout() == layout_type::row_major)
+        {
+            std::size_t last_ax = merge_loops(axes.rbegin(), axes.rend());
+
+            iter_shape.erase(iter_shape.begin() + std::ptrdiff_t(last_ax), iter_shape.end());
+            iter_strides.erase(iter_strides.begin() + std::ptrdiff_t(last_ax), iter_strides.end());
+        }
+        else if (e.layout() == layout_type::column_major)
+        {
+            // we got column_major here
+            std::size_t last_ax = merge_loops(axes.begin(), axes.end());
+
+            // erasing the front vs the back
+            iter_shape.erase(iter_shape.begin(), iter_shape.begin() + std::ptrdiff_t(last_ax + 1));
+            iter_strides.erase(iter_strides.begin(), iter_strides.begin() + std::ptrdiff_t(last_ax + 1));
+
+            // and reversing, to make it work with the same next_idx function
+            std::reverse(iter_shape.begin(), iter_shape.end());
+            std::reverse(iter_strides.begin(), iter_strides.end());
+        }
+        else
+        {
+            XTENSOR_THROW(std::runtime_error, "Layout not supported in immediate reduction.");
+        }
+
+        xindex temp_idx(iter_shape.size());
+        auto next_idx = [&iter_shape, &iter_strides, &temp_idx]()
+        {
+            std::size_t i = iter_shape.size();
+            for (; i > 0; --i)
+            {
+                if (std::ptrdiff_t(temp_idx[i - 1]) >= std::ptrdiff_t(iter_shape[i - 1]) - 1)
+                {
+                    temp_idx[i - 1] = 0;
+                }
+                else
+                {
+                    temp_idx[i - 1]++;
+                    break;
+                }
+            }
+
+            return std::make_pair(
+                i == 0,
+                std::inner_product(temp_idx.begin(), temp_idx.end(), iter_strides.begin(), std::ptrdiff_t(0))
+            );
+        };
+
+        auto begin = e.data();
+        auto out = result.data();
+        auto out_begin = result.data();
+
+        std::ptrdiff_t next_stride = 0;
+
+        std::pair<bool, std::ptrdiff_t> idx_res(false, 0);
+
+        // Remark: eventually some modifications here to make conditions faster where merge + accumulate is
+        // the same function (e.g. check std::is_same<decltype(merge_fct), decltype(reduce_fct)>::value) ...
+
+        auto merge_border = out;
+        bool merge = false;
+
+        // TODO there could be some performance gain by removing merge checking
+        //      when axes.size() == 1 and even next_idx could be removed for something simpler (next_stride
+        //      always the same) best way to do this would be to create a function that takes (begin, out,
+        //      outer_loop_size, inner_loop_size, next_idx_lambda)
+        // Decide if going about it row-wise or col-wise
+        if (inner_stride == 1)
+        {
+            while (idx_res.first != true)
+            {
+                // for unknown reasons it's much faster to use a temporary variable and
+                // std::accumulate here -- probably some cache behavior
+                result_type tmp = init_fct();
+                tmp = std::accumulate(begin, begin + outer_loop_size, tmp, reduce_fct);
+
+                // use merge function if necessary
+                *out = merge ? merge_fct(*out, tmp) : tmp;
+
+                begin += outer_loop_size;
+
+                idx_res = next_idx();
+                next_stride = idx_res.second;
+                out = out_begin + next_stride;
+
+                if (out > merge_border)
+                {
+                    // looped over once
+                    merge = false;
+                    merge_border = out;
+                }
+                else
+                {
+                    merge = true;
+                }
+            };
+        }
+        else
+        {
+            while (idx_res.first != true)
+            {
+                std::transform(
+                    out,
+                    out + inner_loop_size,
+                    begin,
+                    out,
+                    [merge, &init_fct, &reduce_fct](auto&& v1, auto&& v2)
+                    {
+                        return merge ? reduce_fct(v1, v2) :
+                                     // cast because return type of identity function is not upcasted
+                                   reduce_fct(static_cast<result_type>(init_fct()), v2);
+                    }
+                );
+
+                begin += inner_stride;
+                for (std::size_t i = 1; i < outer_loop_size; ++i)
+                {
+                    std::transform(out, out + inner_loop_size, begin, out, reduce_fct);
+                    begin += inner_stride;
+                }
+
+                idx_res = next_idx();
+                next_stride = idx_res.second;
+                out = out_begin + next_stride;
+
+                if (out > merge_border)
+                {
+                    // looped over once
+                    merge = false;
+                    merge_border = out;
+                }
+                else
+                {
+                    merge = true;
+                }
+            };
+        }
+        if (options_t::has_initial_value)
+        {
+            std::transform(
+                result.data(),
+                result.data() + result.size(),
+                result.data(),
+                [&merge_fct, &options](auto&& v)
+                {
+                    return merge_fct(v, options.initial_value);
+                }
+            );
+        }
+        return result;
+    }
+
+    /*********************
+     * xreducer functors *
+     *********************/
+
+    template <class T>
+    struct const_value
+    {
+        using value_type = T;
+
+        constexpr const_value() = default;
+
+        constexpr const_value(T t)
+            : m_value(t)
+        {
+        }
+
+        constexpr T operator()() const
+        {
+            return m_value;
+        }
+
+        template <class NT>
+        using rebind_t = const_value<NT>;
+
+        template <class NT>
+        const_value<NT> rebind() const;
+
+        T m_value;
+    };
+
+    namespace detail
+    {
+        template <class T, bool B>
+        struct evaluated_value_type
+        {
+            using type = T;
+        };
+
+        template <class T>
+        struct evaluated_value_type<T, true>
+        {
+            using type = typename std::decay_t<decltype(xt::eval(std::declval<T>()))>;
+        };
+
+        template <class T, bool B>
+        using evaluated_value_type_t = typename evaluated_value_type<T, B>::type;
+    }
+
+    template <class REDUCE_FUNC, class INIT_FUNC = const_value<long int>, class MERGE_FUNC = REDUCE_FUNC>
+    struct xreducer_functors : public std::tuple<REDUCE_FUNC, INIT_FUNC, MERGE_FUNC>
+    {
+        using self_type = xreducer_functors<REDUCE_FUNC, INIT_FUNC, MERGE_FUNC>;
+        using base_type = std::tuple<REDUCE_FUNC, INIT_FUNC, MERGE_FUNC>;
+        using reduce_functor_type = REDUCE_FUNC;
+        using init_functor_type = INIT_FUNC;
+        using merge_functor_type = MERGE_FUNC;
+        using init_value_type = typename init_functor_type::value_type;
+
+        xreducer_functors()
+            : base_type()
+        {
+        }
+
+        template <class RF>
+        xreducer_functors(RF&& reduce_func)
+            : base_type(std::forward<RF>(reduce_func), INIT_FUNC(), reduce_func)
+        {
+        }
+
+        template <class RF, class IF>
+        xreducer_functors(RF&& reduce_func, IF&& init_func)
+            : base_type(std::forward<RF>(reduce_func), std::forward<IF>(init_func), reduce_func)
+        {
+        }
+
+        template <class RF, class IF, class MF>
+        xreducer_functors(RF&& reduce_func, IF&& init_func, MF&& merge_func)
+            : base_type(std::forward<RF>(reduce_func), std::forward<IF>(init_func), std::forward<MF>(merge_func))
+        {
+        }
+
+        reduce_functor_type get_reduce() const
+        {
+            return std::get<0>(upcast());
+        }
+
+        init_functor_type get_init() const
+        {
+            return std::get<1>(upcast());
+        }
+
+        merge_functor_type get_merge() const
+        {
+            return std::get<2>(upcast());
+        }
+
+        template <class NT>
+        using rebind_t = xreducer_functors<REDUCE_FUNC, const_value<NT>, MERGE_FUNC>;
+
+        template <class NT>
+        rebind_t<NT> rebind()
+        {
+            return make_xreducer_functor(get_reduce(), get_init().template rebind<NT>(), get_merge());
+        }
+
+    private:
+
+        // Workaround for clang-cl
+        const base_type& upcast() const
+        {
+            return static_cast<const base_type&>(*this);
+        }
+    };
+
+    template <class RF>
+    auto make_xreducer_functor(RF&& reduce_func)
+    {
+        using reducer_type = xreducer_functors<std::remove_reference_t<RF>>;
+        return reducer_type(std::forward<RF>(reduce_func));
+    }
+
+    template <class RF, class IF>
+    auto make_xreducer_functor(RF&& reduce_func, IF&& init_func)
+    {
+        using reducer_type = xreducer_functors<std::remove_reference_t<RF>, std::remove_reference_t<IF>>;
+        return reducer_type(std::forward<RF>(reduce_func), std::forward<IF>(init_func));
+    }
+
+    template <class RF, class IF, class MF>
+    auto make_xreducer_functor(RF&& reduce_func, IF&& init_func, MF&& merge_func)
+    {
+        using reducer_type = xreducer_functors<
+            std::remove_reference_t<RF>,
+            std::remove_reference_t<IF>,
+            std::remove_reference_t<MF>>;
+        return reducer_type(
+            std::forward<RF>(reduce_func),
+            std::forward<IF>(init_func),
+            std::forward<MF>(merge_func)
+        );
+    }
+
+    /**********************
+     * xreducer extension *
+     **********************/
+
+    namespace extension
+    {
+        template <class Tag, class F, class CT, class X, class O>
+        struct xreducer_base_impl;
+
+        template <class F, class CT, class X, class O>
+        struct xreducer_base_impl<xtensor_expression_tag, F, CT, X, O>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class F, class CT, class X, class O>
+        struct xreducer_base : xreducer_base_impl<xexpression_tag_t<CT>, F, CT, X, O>
+        {
+        };
+
+        template <class F, class CT, class X, class O>
+        using xreducer_base_t = typename xreducer_base<F, CT, X, O>::type;
+    }
+
+    /************
+     * xreducer *
+     ************/
+
+    template <class F, class CT, class X, class O>
+    class xreducer;
+
+    template <class F, class CT, class X, class O>
+    class xreducer_stepper;
+
+    template <class F, class CT, class X, class O>
+    struct xiterable_inner_types<xreducer<F, CT, X, O>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using inner_shape_type = typename xreducer_shape_type<
+            typename xexpression_type::shape_type,
+            std::decay_t<X>,
+            typename O::keep_dims>::type;
+        using const_stepper = xreducer_stepper<F, CT, X, O>;
+        using stepper = const_stepper;
+    };
+
+    template <class F, class CT, class X, class O>
+    struct xcontainer_inner_types<xreducer<F, CT, X, O>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using reduce_functor_type = typename std::decay_t<F>::reduce_functor_type;
+        using init_functor_type = typename std::decay_t<F>::init_functor_type;
+        using merge_functor_type = typename std::decay_t<F>::merge_functor_type;
+        using substepper_type = typename xexpression_type::const_stepper;
+        using raw_value_type = std::decay_t<decltype(std::declval<reduce_functor_type>()(
+            std::declval<init_functor_type>()(),
+            *std::declval<substepper_type>()
+        ))>;
+        using value_type = typename detail::evaluated_value_type_t<raw_value_type, is_xexpression<raw_value_type>::value>;
+
+        using reference = value_type;
+        using const_reference = value_type;
+        using size_type = typename xexpression_type::size_type;
+    };
+
+    template <class T>
+    struct select_dim_mapping_type
+    {
+        using type = T;
+    };
+
+    template <std::size_t... I>
+    struct select_dim_mapping_type<fixed_shape<I...>>
+    {
+        using type = std::array<std::size_t, sizeof...(I)>;
+    };
+
+    /**
+     * @class xreducer
+     * @brief Reducing function operating over specified axes.
+     *
+     * The xreducer class implements an \ref xexpression applying
+     * a reducing function to an \ref xexpression over the specified
+     * axes.
+     *
+     * @tparam F a tuple of functors (class \ref xreducer_functors or compatible)
+     * @tparam CT the closure type of the \ref xexpression to reduce
+     * @tparam X the list of axes
+     *
+     * The reducer's result_type is deduced from the result type of function
+     * <tt>F::reduce_functor_type</tt> when called with elements of the expression @tparam CT.
+     *
+     * @sa reduce
+     */
+    template <class F, class CT, class X, class O>
+    class xreducer : public xsharable_expression<xreducer<F, CT, X, O>>,
+                     public xconst_iterable<xreducer<F, CT, X, O>>,
+                     public xaccessible<xreducer<F, CT, X, O>>,
+                     public extension::xreducer_base_t<F, CT, X, O>
+    {
+    public:
+
+        using self_type = xreducer<F, CT, X, O>;
+        using inner_types = xcontainer_inner_types<self_type>;
+
+        using reduce_functor_type = typename inner_types::reduce_functor_type;
+        using init_functor_type = typename inner_types::init_functor_type;
+        using merge_functor_type = typename inner_types::merge_functor_type;
+        using xreducer_functors_type = xreducer_functors<reduce_functor_type, init_functor_type, merge_functor_type>;
+
+        using xexpression_type = typename inner_types::xexpression_type;
+        using axes_type = X;
+
+        using extension_base = extension::xreducer_base_t<F, CT, X, O>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using substepper_type = typename inner_types::substepper_type;
+        using value_type = typename inner_types::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = value_type*;
+        using const_pointer = const value_type*;
+
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+
+        using iterable_base = xconst_iterable<self_type>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using shape_type = inner_shape_type;
+
+        using dim_mapping_type = typename select_dim_mapping_type<inner_shape_type>::type;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+        using bool_load_type = typename xexpression_type::bool_load_type;
+
+        static constexpr layout_type static_layout = layout_type::dynamic;
+        static constexpr bool contiguous_layout = false;
+
+        template <class Func, class CTA, class AX, class OX>
+        xreducer(Func&& func, CTA&& e, AX&& axes, OX&& options);
+
+        const inner_shape_type& shape() const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        const xexpression_type& expression() const noexcept;
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type) const noexcept;
+
+        template <class E, class Func = F, class Opts = O>
+        using rebind_t = xreducer<Func, E, X, Opts>;
+
+        template <class E>
+        rebind_t<E> build_reducer(E&& e) const;
+
+        template <class E, class Func, class Opts>
+        rebind_t<E, Func, Opts> build_reducer(E&& e, Func&& func, Opts&& opts) const;
+
+        xreducer_functors_type functors() const
+        {
+            return xreducer_functors_type(m_reduce, m_init, m_merge);  // TODO: understand why
+                                                                       // make_xreducer_functor is throwing an
+                                                                       // error
+        }
+
+        const O& options() const
+        {
+            return m_options;
+        }
+
+    private:
+
+        CT m_e;
+        reduce_functor_type m_reduce;
+        init_functor_type m_init;
+        merge_functor_type m_merge;
+        axes_type m_axes;
+        inner_shape_type m_shape;
+        dim_mapping_type m_dim_mapping;
+        O m_options;
+
+        friend class xreducer_stepper<F, CT, X, O>;
+    };
+
+    /*************************
+     * reduce implementation *
+     *************************/
+
+    namespace detail
+    {
+        template <class F, class E, class X, class O>
+        inline auto reduce_impl(F&& f, E&& e, X&& axes, evaluation_strategy::lazy_type, O&& options)
+        {
+            decltype(auto) normalized_axes = normalize_axis(e, std::forward<X>(axes));
+
+            using reduce_functor_type = typename std::decay_t<F>::reduce_functor_type;
+            using init_functor_type = typename std::decay_t<F>::init_functor_type;
+            using value_type = std::decay_t<decltype(std::declval<reduce_functor_type>()(
+                std::declval<init_functor_type>()(),
+                *std::declval<typename std::decay_t<E>::const_stepper>()
+            ))>;
+            using evaluated_value_type = evaluated_value_type_t<value_type, is_xexpression<value_type>::value>;
+
+            using reducer_type = xreducer<
+                F,
+                const_xclosure_t<E>,
+                xtl::const_closure_type_t<decltype(normalized_axes)>,
+                reducer_options<evaluated_value_type, std::decay_t<O>>>;
+            return reducer_type(
+                std::forward<F>(f),
+                std::forward<E>(e),
+                std::forward<decltype(normalized_axes)>(normalized_axes),
+                std::forward<O>(options)
+            );
+        }
+
+        template <class F, class E, class X, class O>
+        inline auto reduce_impl(F&& f, E&& e, X&& axes, evaluation_strategy::immediate_type, O&& options)
+        {
+            decltype(auto) normalized_axes = normalize_axis(e, std::forward<X>(axes));
+            return reduce_immediate(
+                std::forward<F>(f),
+                eval(std::forward<E>(e)),
+                std::forward<decltype(normalized_axes)>(normalized_axes),
+                std::forward<O>(options)
+            );
+        }
+    }
+
+#define DEFAULT_STRATEGY_REDUCERS std::tuple<evaluation_strategy::lazy_type>
+
+    namespace detail
+    {
+        template <class T>
+        struct is_xreducer_functors_impl : std::false_type
+        {
+        };
+
+        template <class RF, class IF, class MF>
+        struct is_xreducer_functors_impl<xreducer_functors<RF, IF, MF>> : std::true_type
+        {
+        };
+
+        template <class T>
+        using is_xreducer_functors = is_xreducer_functors_impl<std::decay_t<T>>;
+    }
+
+    /**
+     * @brief Returns an \ref xexpression applying the specified reducing
+     * function to an expression over the given axes.
+     *
+     * @param f the reducing function to apply.
+     * @param e the \ref xexpression to reduce.
+     * @param axes the list of axes.
+     * @param options evaluation strategy to use (lazy (default), or immediate)
+     *
+     * The returned expression either hold a const reference to \p e or a copy
+     * depending on whether \p e is an lvalue or an rvalue.
+     */
+
+    template <
+        class F,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, detail::is_xreducer_functors<F>)>
+    inline auto reduce(F&& f, E&& e, X&& axes, EVS&& options = EVS())
+    {
+        return detail::reduce_impl(
+            std::forward<F>(f),
+            std::forward<E>(e),
+            std::forward<X>(axes),
+            typename reducer_options<int, EVS>::evaluation_strategy{},
+            std::forward<EVS>(options)
+        );
+    }
+
+    template <
+        class F,
+        class E,
+        class X,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<is_reducer_options<X>>, xtl::negation<detail::is_xreducer_functors<F>>)>
+    inline auto reduce(F&& f, E&& e, X&& axes, EVS&& options = EVS())
+    {
+        return reduce(
+            make_xreducer_functor(std::forward<F>(f)),
+            std::forward<E>(e),
+            std::forward<X>(axes),
+            std::forward<EVS>(options)
+        );
+    }
+
+    template <
+        class F,
+        class E,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(is_reducer_options<EVS>, detail::is_xreducer_functors<F>)>
+    inline auto reduce(F&& f, E&& e, EVS&& options = EVS())
+    {
+        xindex_type_t<typename std::decay_t<E>::shape_type> ar;
+        resize_container(ar, e.dimension());
+        std::iota(ar.begin(), ar.end(), 0);
+        return detail::reduce_impl(
+            std::forward<F>(f),
+            std::forward<E>(e),
+            std::move(ar),
+            typename reducer_options<int, std::decay_t<EVS>>::evaluation_strategy{},
+            std::forward<EVS>(options)
+        );
+    }
+
+    template <
+        class F,
+        class E,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(is_reducer_options<EVS>, xtl::negation<detail::is_xreducer_functors<F>>)>
+    inline auto reduce(F&& f, E&& e, EVS&& options = EVS())
+    {
+        return reduce(make_xreducer_functor(std::forward<F>(f)), std::forward<E>(e), std::forward<EVS>(options));
+    }
+
+    template <
+        class F,
+        class E,
+        class I,
+        std::size_t N,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(detail::is_xreducer_functors<F>)>
+    inline auto reduce(F&& f, E&& e, const I (&axes)[N], EVS options = EVS())
+    {
+        using axes_type = std::array<std::size_t, N>;
+        auto ax = xt::forward_normalize<axes_type>(e, axes);
+        return detail::reduce_impl(
+            std::forward<F>(f),
+            std::forward<E>(e),
+            std::move(ax),
+            typename reducer_options<int, EVS>::evaluation_strategy{},
+            options
+        );
+    }
+
+    template <
+        class F,
+        class E,
+        class I,
+        std::size_t N,
+        class EVS = DEFAULT_STRATEGY_REDUCERS,
+        XTL_REQUIRES(xtl::negation<detail::is_xreducer_functors<F>>)>
+    inline auto reduce(F&& f, E&& e, const I (&axes)[N], EVS options = EVS())
+    {
+        return reduce(make_xreducer_functor(std::forward<F>(f)), std::forward<E>(e), axes, options);
+    }
+
+    /********************
+     * xreducer_stepper *
+     ********************/
+
+    template <class F, class CT, class X, class O>
+    class xreducer_stepper
+    {
+    public:
+
+        using self_type = xreducer_stepper<F, CT, X, O>;
+        using xreducer_type = xreducer<F, CT, X, O>;
+
+        using value_type = typename xreducer_type::value_type;
+        using reference = typename xreducer_type::value_type;
+        using pointer = typename xreducer_type::const_pointer;
+        using size_type = typename xreducer_type::size_type;
+        using difference_type = typename xreducer_type::difference_type;
+
+        using xexpression_type = typename xreducer_type::xexpression_type;
+        using substepper_type = typename xexpression_type::const_stepper;
+        using shape_type = typename xreducer_type::shape_type;
+
+        xreducer_stepper(
+            const xreducer_type& red,
+            size_type offset,
+            bool end = false,
+            layout_type l = default_assignable_layout(xexpression_type::static_layout)
+        );
+
+        reference operator*() const;
+
+        void step(size_type dim);
+        void step_back(size_type dim);
+        void step(size_type dim, size_type n);
+        void step_back(size_type dim, size_type n);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type l);
+
+    private:
+
+        reference initial_value() const;
+        reference aggregate(size_type dim) const;
+        reference aggregate_impl(size_type dim, /*keep_dims=*/std::false_type) const;
+        reference aggregate_impl(size_type dim, /*keep_dims=*/std::true_type) const;
+
+        substepper_type get_substepper_begin() const;
+        size_type get_dim(size_type dim) const noexcept;
+        size_type shape(size_type i) const noexcept;
+        size_type axis(size_type i) const noexcept;
+
+        const xreducer_type* m_reducer;
+        size_type m_offset;
+        mutable substepper_type m_stepper;
+    };
+
+    /******************
+     * xreducer utils *
+     ******************/
+
+    namespace detail
+    {
+        template <std::size_t X, std::size_t... I>
+        struct in
+        {
+            static constexpr bool value = xtl::disjunction<std::integral_constant<bool, X == I>...>::value;
+        };
+
+        template <std::size_t Z, class S1, class S2, class R>
+        struct fixed_xreducer_shape_type_impl;
+
+        template <std::size_t Z, std::size_t... I, std::size_t... J, std::size_t... R>
+        struct fixed_xreducer_shape_type_impl<Z, fixed_shape<I...>, fixed_shape<J...>, fixed_shape<R...>>
+        {
+            using type = std::conditional_t<
+                in<Z, J...>::value,
+                typename fixed_xreducer_shape_type_impl<Z - 1, fixed_shape<I...>, fixed_shape<J...>, fixed_shape<R...>>::type,
+                typename fixed_xreducer_shape_type_impl<
+                    Z - 1,
+                    fixed_shape<I...>,
+                    fixed_shape<J...>,
+                    fixed_shape<detail::at<Z, I...>::value, R...>>::type>;
+        };
+
+        template <std::size_t... I, std::size_t... J, std::size_t... R>
+        struct fixed_xreducer_shape_type_impl<0, fixed_shape<I...>, fixed_shape<J...>, fixed_shape<R...>>
+        {
+            using type = std::
+                conditional_t<in<0, J...>::value, fixed_shape<R...>, fixed_shape<detail::at<0, I...>::value, R...>>;
+        };
+
+        /***************************
+         * helper for return types *
+         ***************************/
+
+        template <class T>
+        struct xreducer_size_type
+        {
+            using type = std::size_t;
+        };
+
+        template <class T>
+        using xreducer_size_type_t = typename xreducer_size_type<T>::type;
+
+        template <class T>
+        struct xreducer_temporary_type
+        {
+            using type = T;
+        };
+
+        template <class T>
+        using xreducer_temporary_type_t = typename xreducer_temporary_type<T>::type;
+
+        /********************************
+         * Default const_value rebinder *
+         ********************************/
+
+        template <class T, class U>
+        struct const_value_rebinder
+        {
+            static const_value<U> run(const const_value<T>& t)
+            {
+                return const_value<U>(t.m_value);
+            }
+        };
+    }
+
+    /*******************************************
+     * Init functor const_value implementation *
+     *******************************************/
+
+    template <class T>
+    template <class NT>
+    const_value<NT> const_value<T>::rebind() const
+    {
+        return detail::const_value_rebinder<T, NT>::run(*this);
+    }
+
+    /*****************************
+     * fixed_xreducer_shape_type *
+     *****************************/
+
+    template <class S1, class S2>
+    struct fixed_xreducer_shape_type;
+
+    template <std::size_t... I, std::size_t... J>
+    struct fixed_xreducer_shape_type<fixed_shape<I...>, fixed_shape<J...>>
+    {
+        using type = typename detail::
+            fixed_xreducer_shape_type_impl<sizeof...(I) - 1, fixed_shape<I...>, fixed_shape<J...>, fixed_shape<>>::type;
+    };
+
+    // meta-function returning the shape type for an xreducer
+    template <class ST, class X, class O>
+    struct xreducer_shape_type
+    {
+        using type = promote_shape_t<ST, std::decay_t<X>>;
+    };
+
+    template <class I1, std::size_t N1, class I2, std::size_t N2>
+    struct xreducer_shape_type<std::array<I1, N1>, std::array<I2, N2>, std::true_type>
+    {
+        using type = std::array<I2, N1>;
+    };
+
+    template <class I1, std::size_t N1, class I2, std::size_t N2>
+    struct xreducer_shape_type<std::array<I1, N1>, std::array<I2, N2>, std::false_type>
+    {
+        using type = std::array<I2, N1 - N2>;
+    };
+
+    template <std::size_t... I, class I2, std::size_t N2>
+    struct xreducer_shape_type<fixed_shape<I...>, std::array<I2, N2>, std::false_type>
+    {
+        using type = std::conditional_t<sizeof...(I) == N2, fixed_shape<>, std::array<I2, sizeof...(I) - N2>>;
+    };
+
+    namespace detail
+    {
+        template <class S1, class S2>
+        struct ixconcat;
+
+        template <class T, T... I1, T... I2>
+        struct ixconcat<std::integer_sequence<T, I1...>, std::integer_sequence<T, I2...>>
+        {
+            using type = std::integer_sequence<T, I1..., I2...>;
+        };
+
+        template <class T, T X, std::size_t N>
+        struct repeat_integer_sequence
+        {
+            using type = typename ixconcat<
+                std::integer_sequence<T, X>,
+                typename repeat_integer_sequence<T, X, N - 1>::type>::type;
+        };
+
+        template <class T, T X>
+        struct repeat_integer_sequence<T, X, 0>
+        {
+            using type = std::integer_sequence<T>;
+        };
+
+        template <class T, T X>
+        struct repeat_integer_sequence<T, X, 2>
+        {
+            using type = std::integer_sequence<T, X, X>;
+        };
+
+        template <class T, T X>
+        struct repeat_integer_sequence<T, X, 1>
+        {
+            using type = std::integer_sequence<T, X>;
+        };
+    }
+
+    template <std::size_t... I, class I2, std::size_t N2>
+    struct xreducer_shape_type<fixed_shape<I...>, std::array<I2, N2>, std::true_type>
+    {
+        template <std::size_t... X>
+        static constexpr auto get_type(std::index_sequence<X...>)
+        {
+            return fixed_shape<X...>{};
+        }
+
+        // if all axes reduced
+        using type = std::conditional_t<
+            sizeof...(I) == N2,
+            decltype(get_type(typename detail::repeat_integer_sequence<std::size_t, std::size_t(1), N2>::type{})),
+            std::array<I2, sizeof...(I)>>;
+    };
+
+    // Note adding "A" to prevent compilation in case nothing else matches
+    template <std::size_t... I, std::size_t... J, class O>
+    struct xreducer_shape_type<fixed_shape<I...>, fixed_shape<J...>, O>
+    {
+        using type = typename fixed_xreducer_shape_type<fixed_shape<I...>, fixed_shape<J...>>::type;
+    };
+
+    namespace detail
+    {
+        template <class S, class E, class X, class M>
+        inline void shape_and_mapping_computation(S& shape, E& e, const X& axes, M& mapping, std::false_type)
+        {
+            auto first = e.shape().begin();
+            auto last = e.shape().end();
+            auto exclude_it = axes.begin();
+
+            using value_type = typename S::value_type;
+            using difference_type = typename S::difference_type;
+            auto d_first = shape.begin();
+            auto map_first = mapping.begin();
+
+            auto iter = first;
+            while (iter != last && exclude_it != axes.end())
+            {
+                auto diff = std::distance(first, iter);
+                if (diff != difference_type(*exclude_it))
+                {
+                    *d_first++ = *iter++;
+                    *map_first++ = value_type(diff);
+                }
+                else
+                {
+                    ++iter;
+                    ++exclude_it;
+                }
+            }
+
+            auto diff = std::distance(first, iter);
+            auto end = std::distance(iter, last);
+            std::iota(map_first, map_first + end, diff);
+            std::copy(iter, last, d_first);
+        }
+
+        template <class S, class E, class X, class M>
+        inline void
+        shape_and_mapping_computation_keep_dim(S& shape, E& e, const X& axes, M& mapping, std::false_type)
+        {
+            for (std::size_t i = 0; i < e.dimension(); ++i)
+            {
+                if (std::find(axes.cbegin(), axes.cend(), i) == axes.cend())
+                {
+                    // i not in axes!
+                    shape[i] = e.shape()[i];
+                }
+                else
+                {
+                    shape[i] = 1;
+                }
+            }
+            std::iota(mapping.begin(), mapping.end(), 0);
+        }
+
+        template <class S, class E, class X, class M>
+        inline void shape_and_mapping_computation(S&, E&, const X&, M&, std::true_type)
+        {
+        }
+
+        template <class S, class E, class X, class M>
+        inline void shape_and_mapping_computation_keep_dim(S&, E&, const X&, M&, std::true_type)
+        {
+        }
+    }
+
+    /***************************
+     * xreducer implementation *
+     ***************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs an xreducer expression applying the specified
+     * function to the given expression over the given axes.
+     *
+     * @param func the function to apply
+     * @param e the expression to reduce
+     * @param axes the axes along which the reduction is performed
+     */
+    template <class F, class CT, class X, class O>
+    template <class Func, class CTA, class AX, class OX>
+    inline xreducer<F, CT, X, O>::xreducer(Func&& func, CTA&& e, AX&& axes, OX&& options)
+        : m_e(std::forward<CTA>(e))
+        , m_reduce(xt::get<0>(func))
+        , m_init(xt::get<1>(func))
+        , m_merge(xt::get<2>(func))
+        , m_axes(std::forward<AX>(axes))
+        , m_shape(xtl::make_sequence<inner_shape_type>(
+              typename O::keep_dims() ? m_e.dimension() : m_e.dimension() - m_axes.size(),
+              0
+          ))
+        , m_dim_mapping(xtl::make_sequence<dim_mapping_type>(
+              typename O::keep_dims() ? m_e.dimension() : m_e.dimension() - m_axes.size(),
+              0
+          ))
+        , m_options(std::forward<OX>(options))
+    {
+        // std::less is used, because as the standard says (24.4.5):
+        // A sequence is sorted with respect to a comparator comp if for any iterator i pointing to the
+        // sequence and any non-negative integer n such that i + n is a valid iterator pointing to an element
+        // of the sequence, comp(*(i + n), *i) == false. Therefore less is required to detect duplicates.
+        if (!std::is_sorted(m_axes.cbegin(), m_axes.cend(), std::less<>()))
+        {
+            XTENSOR_THROW(std::runtime_error, "Reducing axes should be sorted.");
+        }
+        if (std::adjacent_find(m_axes.cbegin(), m_axes.cend()) != m_axes.cend())
+        {
+            XTENSOR_THROW(std::runtime_error, "Reducing axes should not contain duplicates.");
+        }
+        if (m_axes.size() != 0 && m_axes[m_axes.size() - 1] > m_e.dimension() - 1)
+        {
+            XTENSOR_THROW(
+                std::runtime_error,
+                "Axis " + std::to_string(m_axes[m_axes.size() - 1]) + " out of bounds for reduction."
+            );
+        }
+
+        if (!typename O::keep_dims())
+        {
+            detail::shape_and_mapping_computation(
+                m_shape,
+                m_e,
+                m_axes,
+                m_dim_mapping,
+                detail::is_fixed<shape_type>{}
+            );
+        }
+        else
+        {
+            detail::shape_and_mapping_computation_keep_dim(
+                m_shape,
+                m_e,
+                m_axes,
+                m_dim_mapping,
+                detail::is_fixed<shape_type>{}
+            );
+        }
+    }
+
+    //@}
+
+    /**
+     * @name Size and shape
+     */
+
+    /**
+     * Returns the shape of the expression.
+     */
+    template <class F, class CT, class X, class O>
+    inline auto xreducer<F, CT, X, O>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    /**
+     * Returns the shape of the expression.
+     */
+    template <class F, class CT, class X, class O>
+    inline layout_type xreducer<F, CT, X, O>::layout() const noexcept
+    {
+        return static_layout;
+    }
+
+    template <class F, class CT, class X, class O>
+    inline bool xreducer<F, CT, X, O>::is_contiguous() const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+
+    /**
+     * Returns a constant reference to the element at the specified position in the reducer.
+     * @param args a list of indices specifying the position in the reducer. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the reducer.
+     */
+    template <class F, class CT, class X, class O>
+    template <class... Args>
+    inline auto xreducer<F, CT, X, O>::operator()(Args... args) const -> const_reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        std::array<std::size_t, sizeof...(Args)> arg_array = {{static_cast<std::size_t>(args)...}};
+        return element(arg_array.cbegin(), arg_array.cend());
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the reducer.
+     * @param args a list of indices specifying the position in the reducer. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the reducer, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class F, class CT, class X, class O>
+    template <class... Args>
+    inline auto xreducer<F, CT, X, O>::unchecked(Args... args) const -> const_reference
+    {
+        std::array<std::size_t, sizeof...(Args)> arg_array = {{static_cast<std::size_t>(args)...}};
+        return element(arg_array.cbegin(), arg_array.cend());
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the reducer.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater
+     * than the number of dimensions of the reducer.
+     */
+    template <class F, class CT, class X, class O>
+    template <class It>
+    inline auto xreducer<F, CT, X, O>::element(It first, It last) const -> const_reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        auto stepper = const_stepper(*this, 0);
+        if (first != last)
+        {
+            size_type dim = 0;
+            // drop left most elements
+            auto size = std::ptrdiff_t(this->dimension()) - std::distance(first, last);
+            auto begin = first - size;
+            while (begin != last)
+            {
+                if (begin < first)
+                {
+                    stepper.step(dim++, std::size_t(0));
+                    begin++;
+                }
+                else
+                {
+                    stepper.step(dim++, std::size_t(*begin++));
+                }
+            }
+        }
+        return *stepper;
+    }
+
+    /**
+     * Returns a constant reference to the underlying expression of the reducer.
+     */
+    template <class F, class CT, class X, class O>
+    inline auto xreducer<F, CT, X, O>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_e;
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the reducer to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class F, class CT, class X, class O>
+    template <class S>
+    inline bool xreducer<F, CT, X, O>::broadcast_shape(S& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    /**
+     * Checks whether the xreducer can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class F, class CT, class X, class O>
+    template <class S>
+    inline bool xreducer<F, CT, X, O>::has_linear_assign(const S& /*strides*/) const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    template <class F, class CT, class X, class O>
+    template <class S>
+    inline auto xreducer<F, CT, X, O>::stepper_begin(const S& shape) const noexcept -> const_stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(*this, offset);
+    }
+
+    template <class F, class CT, class X, class O>
+    template <class S>
+    inline auto xreducer<F, CT, X, O>::stepper_end(const S& shape, layout_type l) const noexcept
+        -> const_stepper
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(*this, offset, true, l);
+    }
+
+    template <class F, class CT, class X, class O>
+    template <class E>
+    inline auto xreducer<F, CT, X, O>::build_reducer(E&& e) const -> rebind_t<E>
+    {
+        return rebind_t<E>(
+            std::make_tuple(m_reduce, m_init, m_merge),
+            std::forward<E>(e),
+            axes_type(m_axes),
+            m_options
+        );
+    }
+
+    template <class F, class CT, class X, class O>
+    template <class E, class Func, class Opts>
+    inline auto xreducer<F, CT, X, O>::build_reducer(E&& e, Func&& func, Opts&& opts) const
+        -> rebind_t<E, Func, Opts>
+    {
+        return rebind_t<E, Func, Opts>(
+            std::forward<Func>(func),
+            std::forward<E>(e),
+            axes_type(m_axes),
+            std::forward<Opts>(opts)
+        );
+    }
+
+    /***********************************
+     * xreducer_stepper implementation *
+     ***********************************/
+
+    template <class F, class CT, class X, class O>
+    inline xreducer_stepper<F, CT, X, O>::xreducer_stepper(
+        const xreducer_type& red,
+        size_type offset,
+        bool end,
+        layout_type l
+    )
+        : m_reducer(&red)
+        , m_offset(offset)
+        , m_stepper(get_substepper_begin())
+    {
+        if (end)
+        {
+            to_end(l);
+        }
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::operator*() const -> reference
+    {
+        reference r = aggregate(0);
+        return r;
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::step(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            m_stepper.step(get_dim(dim - m_offset));
+        }
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::step_back(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            m_stepper.step_back(get_dim(dim - m_offset));
+        }
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::step(size_type dim, size_type n)
+    {
+        if (dim >= m_offset)
+        {
+            m_stepper.step(get_dim(dim - m_offset), n);
+        }
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::step_back(size_type dim, size_type n)
+    {
+        if (dim >= m_offset)
+        {
+            m_stepper.step_back(get_dim(dim - m_offset), n);
+        }
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::reset(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            // Because the reducer uses `reset` to reset the non-reducing axes,
+            // we need to prevent that here for the KD case where.
+            if (typename O::keep_dims()
+                && std::binary_search(m_reducer->m_axes.begin(), m_reducer->m_axes.end(), dim - m_offset))
+            {
+                // If keep dim activated, and dim is in the axes, do nothing!
+                return;
+            }
+            m_stepper.reset(get_dim(dim - m_offset));
+        }
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::reset_back(size_type dim)
+    {
+        if (dim >= m_offset)
+        {
+            // Note that for *not* KD this is not going to do anything
+            if (typename O::keep_dims()
+                && std::binary_search(m_reducer->m_axes.begin(), m_reducer->m_axes.end(), dim - m_offset))
+            {
+                // If keep dim activated, and dim is in the axes, do nothing!
+                return;
+            }
+            m_stepper.reset_back(get_dim(dim - m_offset));
+        }
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::to_begin()
+    {
+        m_stepper.to_begin();
+    }
+
+    template <class F, class CT, class X, class O>
+    inline void xreducer_stepper<F, CT, X, O>::to_end(layout_type l)
+    {
+        m_stepper.to_end(l);
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::initial_value() const -> reference
+    {
+        return O::has_initial_value ? m_reducer->m_options.initial_value
+                                    : static_cast<reference>(m_reducer->m_init());
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::aggregate(size_type dim) const -> reference
+    {
+        reference res;
+        if (m_reducer->m_e.size() == size_type(0))
+        {
+            res = initial_value();
+        }
+        else if (m_reducer->m_e.shape().empty() || m_reducer->m_axes.size() == 0)
+        {
+            res = m_reducer->m_reduce(initial_value(), *m_stepper);
+        }
+        else
+        {
+            res = aggregate_impl(dim, typename O::keep_dims());
+            if (O::has_initial_value && dim == 0)
+            {
+                res = m_reducer->m_merge(m_reducer->m_options.initial_value, res);
+            }
+        }
+        return res;
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::aggregate_impl(size_type dim, std::false_type) const -> reference
+    {
+        // reference can be std::array, hence the {} initializer
+        reference res = {};
+        size_type index = axis(dim);
+        size_type size = shape(index);
+        if (dim != m_reducer->m_axes.size() - 1)
+        {
+            res = aggregate_impl(dim + 1, typename O::keep_dims());
+            for (size_type i = 1; i != size; ++i)
+            {
+                m_stepper.step(index);
+                res = m_reducer->m_merge(res, aggregate_impl(dim + 1, typename O::keep_dims()));
+            }
+        }
+        else
+        {
+            res = m_reducer->m_reduce(static_cast<reference>(m_reducer->m_init()), *m_stepper);
+            for (size_type i = 1; i != size; ++i)
+            {
+                m_stepper.step(index);
+                res = m_reducer->m_reduce(res, *m_stepper);
+            }
+        }
+        m_stepper.reset(index);
+        return res;
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::aggregate_impl(size_type dim, std::true_type) const -> reference
+    {
+        // reference can be std::array, hence the {} initializer
+        reference res = {};
+        auto ax_it = std::find(m_reducer->m_axes.begin(), m_reducer->m_axes.end(), dim);
+        if (ax_it != m_reducer->m_axes.end())
+        {
+            size_type index = dim;
+            size_type size = m_reducer->m_e.shape()[index];
+            if (ax_it != m_reducer->m_axes.end() - 1 && size != 0)
+            {
+                res = aggregate_impl(dim + 1, typename O::keep_dims());
+                for (size_type i = 1; i != size; ++i)
+                {
+                    m_stepper.step(index);
+                    res = m_reducer->m_merge(res, aggregate_impl(dim + 1, typename O::keep_dims()));
+                }
+            }
+            else
+            {
+                res = m_reducer->m_reduce(static_cast<reference>(m_reducer->m_init()), *m_stepper);
+                for (size_type i = 1; i != size; ++i)
+                {
+                    m_stepper.step(index);
+                    res = m_reducer->m_reduce(res, *m_stepper);
+                }
+            }
+            m_stepper.reset(index);
+        }
+        else
+        {
+            if (dim < m_reducer->m_e.dimension())
+            {
+                res = aggregate_impl(dim + 1, typename O::keep_dims());
+            }
+        }
+        return res;
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::get_substepper_begin() const -> substepper_type
+    {
+        return m_reducer->m_e.stepper_begin(m_reducer->m_e.shape());
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::get_dim(size_type dim) const noexcept -> size_type
+    {
+        return m_reducer->m_dim_mapping[dim];
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::shape(size_type i) const noexcept -> size_type
+    {
+        return m_reducer->m_e.shape()[i];
+    }
+
+    template <class F, class CT, class X, class O>
+    inline auto xreducer_stepper<F, CT, X, O>::axis(size_type i) const noexcept -> size_type
+    {
+        return m_reducer->m_axes[i];
+    }
+}
+
+#endif

+ 705 - 0
3rd/numpy/include/xtensor/xrepeat.hpp

@@ -0,0 +1,705 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_XREPEAT
+#define XTENSOR_XREPEAT
+
+#include <utility>
+#include <vector>
+
+#include "xaccessible.hpp"
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+
+namespace xt
+{
+    template <class CT, class R>
+    class xrepeat;
+
+    template <class S, class R>
+    class xrepeat_stepper;
+
+    /*********************
+     * xrepeat extension *
+     *********************/
+
+    namespace extension
+    {
+        template <class Tag, class CT, class X>
+        struct xrepeat_base_impl;
+
+        template <class CT, class X>
+        struct xrepeat_base_impl<xtensor_expression_tag, CT, X>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class CT, class X>
+        struct xrepeat_base : xrepeat_base_impl<xexpression_tag_t<CT>, CT, X>
+        {
+        };
+
+        template <class CT, class X>
+        using xrepeat_base_t = typename xrepeat_base<CT, X>::type;
+    }
+
+    /***********
+     * xrepeat *
+     ***********/
+
+    template <class CT, class R>
+    struct xcontainer_inner_types<xrepeat<CT, R>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using reference = typename xexpression_type::const_reference;
+        using const_reference = typename xexpression_type::const_reference;
+        using size_type = typename xexpression_type::size_type;
+        using temporary_type = typename xexpression_type::temporary_type;
+
+        static constexpr bool is_const = std::is_const<std::remove_reference_t<CT>>::value;
+
+        using extract_storage_type = xtl::mpl::eval_if_t<
+            has_data_interface<xexpression_type>,
+            detail::expr_storage_type<xexpression_type>,
+            make_invalid_type<>>;
+        using storage_type = std::conditional_t<is_const, const extract_storage_type, extract_storage_type>;
+    };
+
+    template <class CT, class R>
+    struct xiterable_inner_types<xrepeat<CT, R>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using repeats_type = std::decay_t<R>;
+        using inner_shape_type = typename xexpression_type::inner_shape_type;
+        using const_stepper = xrepeat_stepper<typename xexpression_type::const_stepper, repeats_type>;
+        using stepper = const_stepper;
+    };
+
+    /**
+     * @class xrepeat
+     * @brief Expression with repeated values along an axis.
+     *
+     * The xrepeat class implements the repetition of the elements of
+     * an \ref xexpression along a given axis. xrepeat is not meant
+     * to be used directly, but only with the \ref repeat helper
+     * functions.
+     *
+     * @sa repeat
+     */
+    template <class CT, class R>
+    class xrepeat : public xconst_iterable<xrepeat<CT, R>>,
+                    public xconst_accessible<xrepeat<CT, R>>,
+                    public xsharable_expression<xrepeat<CT, R>>,
+                    public extension::xrepeat_base_t<CT, R>
+    {
+    public:
+
+        using self_type = xrepeat<CT, R>;
+        using xexpression_type = std::decay_t<CT>;
+        using accessible_base = xconst_accessible<self_type>;
+        using extension_base = extension::xrepeat_base_t<CT, R>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using value_type = typename xexpression_type::value_type;
+        using shape_type = typename xexpression_type::shape_type;
+        using repeats_type = xtl::const_closure_type_t<R>;
+
+        using container_type = xcontainer_inner_types<xrepeat<CT, R>>;
+        using reference = typename container_type::reference;
+        using const_reference = typename container_type::const_reference;
+        using size_type = typename container_type::size_type;
+        using temporary_type = typename container_type::temporary_type;
+
+        static constexpr layout_type static_layout = xexpression_type::static_layout;
+        static constexpr bool contiguous_layout = false;
+
+        using bool_load_type = typename xexpression_type::bool_load_type;
+        using pointer = typename xexpression_type::pointer;
+        using const_pointer = typename xexpression_type::const_pointer;
+        using difference_type = typename xexpression_type::difference_type;
+
+        using iterable_type = xiterable<xrepeat<CT, R>>;
+        using stepper = typename iterable_type::stepper;
+        using const_stepper = typename iterable_type::const_stepper;
+
+        template <class CTA>
+        explicit xrepeat(CTA&& e, R&& repeats, size_type axis);
+
+        using accessible_base::size;
+        const shape_type& shape() const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+        using accessible_base::shape;
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        const xexpression_type& expression() const noexcept;
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+
+        const_stepper stepper_begin() const;
+        const_stepper stepper_begin(const shape_type& s) const;
+
+        const_stepper stepper_end(layout_type l) const;
+        const_stepper stepper_end(const shape_type& s, layout_type l) const;
+
+    private:
+
+        CT m_e;
+        size_type m_repeating_axis;
+        repeats_type m_repeats;
+        shape_type m_shape;
+
+        const_reference access() const;
+
+        template <class Arg, class... Args>
+        const_reference access(Arg arg, Args... args) const;
+
+        template <std::size_t I, class Arg, class... Args>
+        const_reference access_impl(stepper&& s, Arg arg, Args... args) const;
+
+        template <std::size_t I>
+        const_reference access_impl(stepper&& s) const;
+    };
+
+    /*******************
+     * xrepeat_stepper *
+     *******************/
+
+    template <class S, class R>
+    class xrepeat_stepper
+    {
+    public:
+
+        using repeats_type = R;
+        using storage_type = typename S::storage_type;
+        using subiterator_type = typename S::subiterator_type;
+        using subiterator_traits = typename S::subiterator_traits;
+        using value_type = typename subiterator_traits::value_type;
+        using reference = typename subiterator_traits::reference;
+        using pointer = typename subiterator_traits::pointer;
+        using difference_type = typename subiterator_traits::difference_type;
+        using size_type = typename storage_type::size_type;
+        using shape_type = typename storage_type::shape_type;
+        using simd_value_type = xt_simd::simd_type<value_type>;
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        xrepeat_stepper(S&& s, const shape_type& shape, const repeats_type& repeats, size_type axis);
+
+        reference operator*() const;
+
+        void step(size_type dim, size_type n = 1);
+        void step_back(size_type dim, size_type n = 1);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type l);
+
+        template <class T>
+        simd_return_type<T> step_simd();
+
+        void step_leading();
+
+        template <class V>
+        void store_simd(const V& vec);
+
+    private:
+
+        S m_substepper;
+        const shape_type& m_shape;
+
+        std::ptrdiff_t m_repeating_steps;
+        std::vector<size_type> m_positions;
+        size_type m_subposition;
+
+        size_type m_repeating_axis;
+        const repeats_type& m_repeats;
+
+        void make_step(size_type dim, size_type n);
+        void make_step_back(size_type dim, size_type n);
+
+        std::vector<size_type> get_next_positions(size_type dim, size_type steps_to_go) const;
+        std::vector<size_type> get_next_positions_back(size_type dim, size_type steps_to_go) const;
+    };
+
+    /**************************
+     * xrepeat implementation *
+     **************************/
+
+    /**
+     * Constructs an xrepeat expression repeating the element of the specified
+     * \ref xexpression.
+     *
+     * @param e the input expression
+     * @param repeats The number of repetitions for each elements
+     * @param axis The axis along which to repeat the value
+     */
+    template <class CT, class R>
+    template <class CTA>
+    xrepeat<CT, R>::xrepeat(CTA&& e, R&& repeats, size_type axis)
+        : m_e(std::forward<CTA>(e))
+        , m_repeating_axis(axis)
+        , m_repeats(std::forward<R>(repeats))
+        , m_shape(e.shape())
+    {
+        using shape_value_type = typename shape_type::value_type;
+        m_shape[axis] = static_cast<shape_value_type>(
+            std::accumulate(m_repeats.begin(), m_repeats.end(), shape_value_type(0))
+        );
+    }
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the shape of the expression.
+     */
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::shape() const noexcept -> const shape_type&
+    {
+        return m_shape;
+    }
+
+    /**
+     * Returns the layout_type of the expression.
+     */
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::layout() const noexcept -> layout_type
+    {
+        return m_e.layout();
+    }
+
+    template <class CT, class R>
+    inline bool xrepeat<CT, R>::is_contiguous() const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+    //@{
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the function. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the expression.
+     */
+    template <class CT, class R>
+    template <class... Args>
+    inline auto xrepeat<CT, R>::operator()(Args... args) const -> const_reference
+    {
+        return access(args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the expression.
+     * @param args a list of indices specifying the position in the expression. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the expression, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class CT, class R>
+    template <class... Args>
+    inline auto xrepeat<CT, R>::unchecked(Args... args) const -> const_reference
+    {
+        return this->operator()(args...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the view.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater than the the number
+     * of dimensions of the view..
+     */
+    template <class CT, class R>
+    template <class It>
+    inline auto xrepeat<CT, R>::element(It first, It last) const -> const_reference
+    {
+        auto s = stepper_begin(m_e.shape());
+        std::size_t dimension = 0;
+        auto iter = first;
+        while (iter != last)
+        {
+            s.step(dimension, *iter);
+            ++dimension;
+            ++iter;
+        }
+        return access_impl<0>(std::forward<stepper>(s));
+    }
+
+    /**
+     * Returns a constant reference to the underlying expression of the broadcast expression.
+     */
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_e;
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the function to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class CT, class R>
+    template <class S>
+    inline bool xrepeat<CT, R>::broadcast_shape(S& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    /**
+     * Checks whether the xbroadcast can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class CT, class R>
+    template <class S>
+    inline bool xrepeat<CT, R>::has_linear_assign(const S&) const noexcept
+    {
+        return false;
+    }
+
+    //@}
+
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::access() const -> const_reference
+    {
+        return access_impl<0>(stepper_begin(m_e.shape()));
+    }
+
+    template <class CT, class R>
+    template <class Arg, class... Args>
+    inline auto xrepeat<CT, R>::access(Arg arg, Args... args) const -> const_reference
+    {
+        constexpr size_t number_of_arguments = 1 + sizeof...(Args);
+        if (number_of_arguments > this->dimension())
+        {
+            return access(args...);
+        }
+        return access_impl<0>(stepper_begin(m_e.shape()), arg, args...);
+    }
+
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::stepper_begin() const -> const_stepper
+    {
+        return stepper_begin(m_e.shape());
+    }
+
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::stepper_begin(const shape_type& s) const -> const_stepper
+    {
+        return const_stepper(m_e.stepper_begin(s), m_shape, m_repeats, m_repeating_axis);
+    }
+
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::stepper_end(layout_type l) const -> const_stepper
+    {
+        return stepper_end(m_e.shape(), l);
+    }
+
+    template <class CT, class R>
+    inline auto xrepeat<CT, R>::stepper_end(const shape_type& s, layout_type l) const -> const_stepper
+    {
+        auto st = const_stepper(m_e.stepper_begin(s), m_shape, m_repeats, m_repeating_axis);
+        st.to_end(l);
+        return st;
+    }
+
+    template <class CT, class R>
+    template <std::size_t I, class Arg, class... Args>
+    inline auto xrepeat<CT, R>::access_impl(stepper&& s, Arg arg, Args... args) const -> const_reference
+    {
+        s.step(I, static_cast<size_type>(arg));
+        return access_impl<I + 1>(std::forward<stepper>(s), args...);
+    }
+
+    template <class CT, class R>
+    template <std::size_t I>
+    inline auto xrepeat<CT, R>::access_impl(stepper&& s) const -> const_reference
+    {
+        return *s;
+    }
+
+    /**********************************
+     * xrepeat_stepper implementation *
+     **********************************/
+
+    template <class S, class R>
+    xrepeat_stepper<S, R>::xrepeat_stepper(S&& s, const shape_type& shape, const repeats_type& repeats, size_type axis)
+        : m_substepper(std::forward<S>(s))
+        , m_shape(shape)
+        , m_repeating_steps(0)
+        , m_positions(shape.size())
+        , m_subposition(0)
+        , m_repeating_axis(axis)
+        , m_repeats(repeats)
+    {
+    }
+
+    template <class S, class R>
+    inline auto xrepeat_stepper<S, R>::operator*() const -> reference
+    {
+        return m_substepper.operator*();
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::step(size_type dim, size_type steps_to_go)
+    {
+        if (m_positions[dim] + steps_to_go >= m_shape[dim])
+        {
+            const auto next_positions = get_next_positions(dim, steps_to_go);
+            if (next_positions[dim] > m_positions[dim])
+            {
+                make_step(dim, next_positions[dim] - m_positions[dim]);
+            }
+            else
+            {
+                make_step_back(dim, m_positions[dim] - next_positions[dim]);
+            }
+            for (size_type d = 0; d < dim; ++d)
+            {
+                make_step(d, next_positions[d] - m_positions[d]);
+            }
+        }
+        else
+        {
+            make_step(dim, steps_to_go);
+        }
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::step_back(size_type dim, size_type steps_to_go)
+    {
+        if (m_positions[dim] < steps_to_go)
+        {
+            const auto next_positions = get_next_positions_back(dim, steps_to_go);
+            if (next_positions[dim] < m_positions[dim])
+            {
+                make_step_back(dim, m_positions[dim] - next_positions[dim]);
+            }
+            else
+            {
+                make_step(dim, next_positions[dim] - m_positions[dim]);
+            }
+            for (size_type d = 0; d < dim; ++d)
+            {
+                make_step_back(d, m_positions[d] - next_positions[d]);
+            }
+        }
+        else
+        {
+            make_step_back(dim, steps_to_go);
+        }
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::reset(size_type dim)
+    {
+        m_substepper.reset(dim);
+        m_positions[dim] = 0;
+        if (dim == m_repeating_axis)
+        {
+            m_subposition = 0;
+            m_repeating_steps = 0;
+        }
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::reset_back(size_type dim)
+    {
+        m_substepper.reset_back(dim);
+        m_positions[dim] = m_shape[dim] - 1;
+        if (dim == m_repeating_axis)
+        {
+            m_subposition = m_repeats.size() - 1;
+            m_repeating_steps = static_cast<std::ptrdiff_t>(m_repeats.back()) - 1;
+        }
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::to_begin()
+    {
+        m_substepper.to_begin();
+        std::fill(m_positions.begin(), m_positions.end(), 0);
+        m_subposition = 0;
+        m_repeating_steps = 0;
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::to_end(layout_type l)
+    {
+        m_substepper.to_end(l);
+        std::transform(
+            m_shape.begin(),
+            m_shape.end(),
+            m_positions.begin(),
+            [](auto value)
+            {
+                return value - 1;
+            }
+        );
+        if (layout_type::row_major == l)
+        {
+            ++m_positions.front();
+        }
+        else
+        {
+            ++m_positions.back();
+        }
+        m_subposition = m_repeats.size();
+        m_repeating_steps = 0;
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::step_leading()
+    {
+        step(m_shape.size() - 1, 1);
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::make_step(size_type dim, size_type steps_to_go)
+    {
+        if (steps_to_go > 0)
+        {
+            if (dim == m_repeating_axis)
+            {
+                size_type subposition = m_subposition;
+                m_repeating_steps += static_cast<std::ptrdiff_t>(steps_to_go);
+                while (m_repeating_steps >= static_cast<ptrdiff_t>(m_repeats[subposition]))
+                {
+                    m_repeating_steps -= static_cast<ptrdiff_t>(m_repeats[subposition]);
+                    ++subposition;
+                }
+                m_substepper.step(dim, subposition - m_subposition);
+                m_subposition = subposition;
+            }
+            else
+            {
+                m_substepper.step(dim, steps_to_go);
+            }
+            m_positions[dim] += steps_to_go;
+        }
+    }
+
+    template <class S, class R>
+    inline void xrepeat_stepper<S, R>::make_step_back(size_type dim, size_type steps_to_go)
+    {
+        if (steps_to_go > 0)
+        {
+            if (dim == m_repeating_axis)
+            {
+                size_type subposition = m_subposition;
+                m_repeating_steps -= static_cast<std::ptrdiff_t>(steps_to_go);
+                while (m_repeating_steps < 0)
+                {
+                    --subposition;
+                    m_repeating_steps += static_cast<ptrdiff_t>(m_repeats[subposition]);
+                }
+                m_substepper.step_back(dim, m_subposition - subposition);
+                m_subposition = subposition;
+            }
+            else
+            {
+                m_substepper.step_back(dim, steps_to_go);
+            }
+            m_positions[dim] -= steps_to_go;
+        }
+    }
+
+    template <class S, class R>
+    inline auto xrepeat_stepper<S, R>::get_next_positions(size_type dim, size_type steps_to_go) const
+        -> std::vector<size_type>
+    {
+        size_type next_position_for_dim = m_positions[dim] + steps_to_go;
+        if (dim > 0)
+        {
+            size_type steps_in_previous_dim = 0;
+            while (next_position_for_dim >= m_shape[dim])
+            {
+                next_position_for_dim -= m_shape[dim];
+                ++steps_in_previous_dim;
+            }
+            if (steps_in_previous_dim > 0)
+            {
+                auto next_positions = get_next_positions(dim - 1, steps_in_previous_dim);
+                next_positions[dim] = next_position_for_dim;
+                return next_positions;
+            }
+        }
+        std::vector<size_type> next_positions = m_positions;
+        next_positions[dim] = next_position_for_dim;
+        return next_positions;
+    }
+
+    template <class S, class R>
+    inline auto xrepeat_stepper<S, R>::get_next_positions_back(size_type dim, size_type steps_to_go) const
+        -> std::vector<size_type>
+    {
+        auto next_position_for_dim = static_cast<std::ptrdiff_t>(m_positions[dim] - steps_to_go);
+        if (dim > 0)
+        {
+            size_type steps_in_previous_dim = 0;
+            while (next_position_for_dim < 0)
+            {
+                next_position_for_dim += static_cast<std::ptrdiff_t>(m_shape[dim]);
+                ++steps_in_previous_dim;
+            }
+            if (steps_in_previous_dim > 0)
+            {
+                auto next_positions = get_next_positions_back(dim - 1, steps_in_previous_dim);
+                next_positions[dim] = static_cast<size_type>(next_position_for_dim);
+                return next_positions;
+            }
+        }
+        std::vector<size_type> next_positions = m_positions;
+        next_positions[dim] = static_cast<size_type>(next_position_for_dim);
+        return next_positions;
+    }
+}
+
+#endif

+ 1098 - 0
3rd/numpy/include/xtensor/xscalar.hpp

@@ -0,0 +1,1098 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_SCALAR_HPP
+#define XTENSOR_SCALAR_HPP
+
+#include <array>
+#include <cstddef>
+#include <utility>
+
+#include <xtl/xtype_traits.hpp>
+
+#include "xaccessible.hpp"
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+#include "xlayout.hpp"
+#include "xtensor_simd.hpp"
+
+namespace xt
+{
+
+    /*********************
+     * xscalar extension *
+     *********************/
+
+    namespace extension
+    {
+        template <class Tag, class CT>
+        struct xscalar_base_impl;
+
+        template <class CT>
+        struct xscalar_base_impl<xtensor_expression_tag, CT>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class CT>
+        struct xscalar_base : xscalar_base_impl<get_expression_tag_t<std::decay_t<CT>>, CT>
+        {
+        };
+
+        template <class CT>
+        using xscalar_base_t = typename xscalar_base<CT>::type;
+    }
+
+    /***********
+     * xscalar *
+     ***********/
+
+    // xscalar is a cheap wrapper for a scalar value as an xexpression.
+    template <class CT>
+    class xscalar;
+
+    template <bool is_const, class CT>
+    class xscalar_stepper;
+
+    template <bool is_const, class CT>
+    class xdummy_iterator;
+
+    template <class CT>
+    struct xiterable_inner_types<xscalar<CT>>
+    {
+        using value_type = std::decay_t<CT>;
+        using inner_shape_type = std::array<std::size_t, 0>;
+        using shape_type = inner_shape_type;
+        using const_stepper = xscalar_stepper<true, CT>;
+        using stepper = xscalar_stepper<false, CT>;
+    };
+
+    template <class CT>
+    struct xcontainer_inner_types<xscalar<CT>>
+    {
+        using value_type = std::decay_t<CT>;
+        using reference = value_type&;
+        using const_reference = const value_type&;
+        using size_type = std::size_t;
+    };
+
+    template <class CT>
+    class xscalar : public xsharable_expression<xscalar<CT>>,
+                    private xiterable<xscalar<CT>>,
+                    private xaccessible<xscalar<CT>>,
+                    public extension::xscalar_base_t<CT>
+    {
+    public:
+
+        using self_type = xscalar<CT>;
+        using xexpression_type = std::decay_t<CT>;
+        using extension_base = extension::xscalar_base_t<CT>;
+        using accessible_base = xaccessible<self_type>;
+        using expression_tag = typename extension_base::expression_tag;
+        using inner_types = xcontainer_inner_types<self_type>;
+
+        using value_type = typename inner_types::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = value_type*;
+        using const_pointer = const value_type*;
+        using size_type = typename inner_types::size_type;
+        using difference_type = std::ptrdiff_t;
+        using simd_value_type = xt_simd::simd_type<value_type>;
+        using bool_load_type = xt::bool_load_type<value_type>;
+
+        using iterable_base = xiterable<self_type>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using shape_type = inner_shape_type;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        template <layout_type L>
+        using layout_iterator = typename iterable_base::template layout_iterator<L>;
+        template <layout_type L>
+        using const_layout_iterator = typename iterable_base::template const_layout_iterator<L>;
+
+        template <layout_type L>
+        using reverse_layout_iterator = typename iterable_base::template reverse_layout_iterator<L>;
+        template <layout_type L>
+        using const_reverse_layout_iterator = typename iterable_base::template const_reverse_layout_iterator<L>;
+
+        template <class S, layout_type L>
+        using broadcast_iterator = typename iterable_base::template broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_broadcast_iterator = typename iterable_base::template const_broadcast_iterator<S, L>;
+
+        template <class S, layout_type L>
+        using reverse_broadcast_iterator = typename iterable_base::template reverse_broadcast_iterator<S, L>;
+        template <class S, layout_type L>
+        using const_reverse_broadcast_iterator = typename iterable_base::template const_reverse_broadcast_iterator<S, L>;
+
+        using iterator = value_type*;
+        using const_iterator = const value_type*;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+        using dummy_iterator = xdummy_iterator<false, CT>;
+        using const_dummy_iterator = xdummy_iterator<true, CT>;
+
+        static constexpr layout_type static_layout = layout_type::any;
+        static constexpr bool contiguous_layout = true;
+
+        xscalar() noexcept;
+        xscalar(CT value) noexcept;
+
+        operator value_type&() noexcept;
+        operator const value_type&() const noexcept;
+
+        size_type size() const noexcept;
+        const shape_type& shape() const noexcept;
+        size_type shape(size_type i) const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+        using accessible_base::dimension;
+        using accessible_base::shape;
+
+        template <class... Args>
+        reference operator()(Args...) noexcept;
+        template <class... Args>
+        reference unchecked(Args...) noexcept;
+
+        template <class... Args>
+        const_reference operator()(Args...) const noexcept;
+        template <class... Args>
+        const_reference unchecked(Args...) const noexcept;
+
+        using accessible_base::at;
+        using accessible_base::operator[];
+        using accessible_base::back;
+        using accessible_base::front;
+        using accessible_base::in_bounds;
+        using accessible_base::periodic;
+
+        template <class It>
+        reference element(It, It) noexcept;
+
+        template <class It>
+        const_reference element(It, It) const noexcept;
+
+        xexpression_type& expression() noexcept;
+        const xexpression_type& expression() const noexcept;
+
+        template <class S>
+        bool broadcast_shape(S& shape, bool reuse_cache = false) const noexcept;
+
+        template <class S>
+        bool has_linear_assign(const S& strides) const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        iterator begin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        iterator end() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_iterator begin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_iterator end() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_iterator cbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_iterator cend() const noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_iterator rbegin() noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_iterator rend() noexcept;
+
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_iterator rbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_iterator rend() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_iterator crbegin() const noexcept;
+        template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_iterator crend() const noexcept;
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        broadcast_iterator<S, L> begin(const S& shape) noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        broadcast_iterator<S, L> end(const S& shape) noexcept;
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> begin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> end(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> cbegin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_broadcast_iterator<S, L> cend(const S& shape) const noexcept;
+
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_broadcast_iterator<S, L> rbegin(const S& shape) noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        reverse_broadcast_iterator<S, L> rend(const S& shape) noexcept;
+
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> rbegin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> rend(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> crbegin(const S& shape) const noexcept;
+        template <class S, layout_type L = XTENSOR_DEFAULT_TRAVERSAL>
+        const_reverse_broadcast_iterator<S, L> crend(const S& shape) const noexcept;
+
+        iterator linear_begin() noexcept;
+        iterator linear_end() noexcept;
+
+        const_iterator linear_begin() const noexcept;
+        const_iterator linear_end() const noexcept;
+        const_iterator linear_cbegin() const noexcept;
+        const_iterator linear_cend() const noexcept;
+
+        reverse_iterator linear_rbegin() noexcept;
+        reverse_iterator linear_rend() noexcept;
+
+        const_reverse_iterator linear_rbegin() const noexcept;
+        const_reverse_iterator linear_rend() const noexcept;
+        const_reverse_iterator linear_crbegin() const noexcept;
+        const_reverse_iterator linear_crend() const noexcept;
+
+        template <class S>
+        stepper stepper_begin(const S& shape) noexcept;
+        template <class S>
+        stepper stepper_end(const S& shape, layout_type l) noexcept;
+
+        template <class S>
+        const_stepper stepper_begin(const S& shape) const noexcept;
+        template <class S>
+        const_stepper stepper_end(const S& shape, layout_type l) const noexcept;
+
+        dummy_iterator dummy_begin() noexcept;
+        dummy_iterator dummy_end() noexcept;
+
+        const_dummy_iterator dummy_begin() const noexcept;
+        const_dummy_iterator dummy_end() const noexcept;
+
+        reference data_element(size_type i) noexcept;
+        const_reference data_element(size_type i) const noexcept;
+
+        reference flat(size_type i) noexcept;
+        const_reference flat(size_type i) const noexcept;
+
+        template <class align, class simd = simd_value_type>
+        void store_simd(size_type i, const simd& e);
+        template <class align, class requested_type = value_type, std::size_t N = xt_simd::simd_traits<requested_type>::size>
+        xt_simd::simd_return_type<value_type, requested_type> load_simd(size_type i) const;
+
+    private:
+
+        CT m_value;
+
+        friend class xconst_iterable<self_type>;
+        friend class xiterable<self_type>;
+        friend class xaccessible<self_type>;
+        friend class xconst_accessible<self_type>;
+    };
+
+    namespace detail
+    {
+        template <class E>
+        struct is_xscalar_impl : std::false_type
+        {
+        };
+
+        template <class E>
+        struct is_xscalar_impl<xscalar<E>> : std::true_type
+        {
+        };
+    }
+
+    template <class E>
+    using is_xscalar = detail::is_xscalar_impl<E>;
+
+    namespace detail
+    {
+        template <class... E>
+        struct all_xscalar
+        {
+            static constexpr bool value = xtl::conjunction<is_xscalar<std::decay_t<E>>...>::value;
+        };
+    }
+
+    // Note: MSVC bug workaround. Cannot just define
+    // template <class... E>
+    // using all_xscalar = xtl::conjunction<is_xscalar<std::decay_t<E>>...>;
+
+    template <class... E>
+    using all_xscalar = detail::all_xscalar<E...>;
+
+    /******************
+     * xref and xcref *
+     ******************/
+
+    template <class T>
+    xscalar<T&> xref(T& t);
+
+    template <class T>
+    xscalar<const T&> xcref(T& t);
+
+    /*******************
+     * xscalar_stepper *
+     *******************/
+
+    template <bool is_const, class CT>
+    class xscalar_stepper
+    {
+    public:
+
+        using self_type = xscalar_stepper<is_const, CT>;
+        using storage_type = std::conditional_t<is_const, const xscalar<CT>, xscalar<CT>>;
+
+        using value_type = typename storage_type::value_type;
+        using reference = std::
+            conditional_t<is_const, typename storage_type::const_reference, typename storage_type::reference>;
+        using pointer = std::conditional_t<is_const, typename storage_type::const_pointer, typename storage_type::pointer>;
+        using size_type = typename storage_type::size_type;
+        using difference_type = typename storage_type::difference_type;
+        using shape_type = typename storage_type::shape_type;
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        xscalar_stepper(storage_type* c) noexcept;
+
+        reference operator*() const noexcept;
+
+        void step(size_type dim, size_type n = 1) noexcept;
+        void step_back(size_type dim, size_type n = 1) noexcept;
+        void reset(size_type dim) noexcept;
+        void reset_back(size_type dim) noexcept;
+
+        void to_begin() noexcept;
+        void to_end(layout_type l) noexcept;
+
+        template <class T>
+        simd_return_type<T> step_simd();
+
+        void step_leading();
+
+    private:
+
+        storage_type* p_c;
+    };
+
+    /*******************
+     * xdummy_iterator *
+     *******************/
+
+    namespace detail
+    {
+        template <bool is_const, class CT>
+        using dummy_reference_t = std::
+            conditional_t<is_const, typename xscalar<CT>::const_reference, typename xscalar<CT>::reference>;
+
+        template <bool is_const, class CT>
+        using dummy_pointer_t = std::
+            conditional_t<is_const, typename xscalar<CT>::const_pointer, typename xscalar<CT>::pointer>;
+    }
+
+    template <bool is_const, class CT>
+    class xdummy_iterator : public xtl::xrandom_access_iterator_base<
+                                xdummy_iterator<is_const, CT>,
+                                typename xscalar<CT>::value_type,
+                                typename xscalar<CT>::difference_type,
+                                detail::dummy_pointer_t<is_const, CT>,
+                                detail::dummy_reference_t<is_const, CT>>
+    {
+    public:
+
+        using self_type = xdummy_iterator<is_const, CT>;
+        using storage_type = std::conditional_t<is_const, const xscalar<CT>, xscalar<CT>>;
+
+        using value_type = typename storage_type::value_type;
+        using reference = detail::dummy_reference_t<is_const, CT>;
+        using pointer = detail::dummy_pointer_t<is_const, CT>;
+        using difference_type = typename storage_type::difference_type;
+        using iterator_category = std::random_access_iterator_tag;
+
+        explicit xdummy_iterator(storage_type* c) noexcept;
+
+        self_type& operator++() noexcept;
+        self_type& operator--() noexcept;
+
+        self_type& operator+=(difference_type n) noexcept;
+        self_type& operator-=(difference_type n) noexcept;
+
+        difference_type operator-(const self_type& rhs) const noexcept;
+
+        reference operator*() const noexcept;
+
+        bool equal(const self_type& rhs) const noexcept;
+        bool less_than(const self_type& rhs) const noexcept;
+
+    private:
+
+        storage_type* p_c;
+    };
+
+    template <bool is_const, class CT>
+    bool
+    operator==(const xdummy_iterator<is_const, CT>& lhs, const xdummy_iterator<is_const, CT>& rhs) noexcept;
+
+    template <bool is_const, class CT>
+    bool operator<(const xdummy_iterator<is_const, CT>& lhs, const xdummy_iterator<is_const, CT>& rhs) noexcept;
+
+    template <class T>
+    struct is_not_xdummy_iterator : std::true_type
+    {
+    };
+
+    template <bool is_const, class CT>
+    struct is_not_xdummy_iterator<xdummy_iterator<is_const, CT>> : std::false_type
+    {
+    };
+
+    /*****************************
+     * linear_begin / linear_end *
+     *****************************/
+
+    template <class CT>
+    XTENSOR_CONSTEXPR_RETURN auto linear_begin(xscalar<CT>& c) noexcept -> decltype(c.dummy_begin())
+    {
+        return c.dummy_begin();
+    }
+
+    template <class CT>
+    XTENSOR_CONSTEXPR_RETURN auto linear_end(xscalar<CT>& c) noexcept -> decltype(c.dummy_end())
+    {
+        return c.dummy_end();
+    }
+
+    template <class CT>
+    XTENSOR_CONSTEXPR_RETURN auto linear_begin(const xscalar<CT>& c) noexcept -> decltype(c.dummy_begin())
+    {
+        return c.dummy_begin();
+    }
+
+    template <class CT>
+    XTENSOR_CONSTEXPR_RETURN auto linear_end(const xscalar<CT>& c) noexcept -> decltype(c.dummy_end())
+    {
+        return c.dummy_end();
+    }
+
+    /**************************
+     * xscalar implementation *
+     **************************/
+
+    // This constructor will not compile when CT is a reference type.
+    template <class CT>
+    inline xscalar<CT>::xscalar() noexcept
+        : m_value()
+    {
+    }
+
+    template <class CT>
+    inline xscalar<CT>::xscalar(CT value) noexcept
+        : m_value(value)
+    {
+    }
+
+    template <class CT>
+    inline xscalar<CT>::operator value_type&() noexcept
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    inline xscalar<CT>::operator const value_type&() const noexcept
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::size() const noexcept -> size_type
+    {
+        return 1;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::shape() const noexcept -> const shape_type&
+    {
+        static std::array<size_type, 0> zero_shape;
+        return zero_shape;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::shape(size_type) const noexcept -> size_type
+    {
+        return 0;
+    }
+
+    template <class CT>
+    inline layout_type xscalar<CT>::layout() const noexcept
+    {
+        return static_layout;
+    }
+
+    template <class CT>
+    inline bool xscalar<CT>::is_contiguous() const noexcept
+    {
+        return true;
+    }
+
+    template <class CT>
+    template <class... Args>
+    inline auto xscalar<CT>::operator()(Args...) noexcept -> reference
+    {
+        XTENSOR_CHECK_DIMENSION((std::array<int, 0>()), Args()...);
+        return m_value;
+    }
+
+    template <class CT>
+    template <class... Args>
+    inline auto xscalar<CT>::unchecked(Args...) noexcept -> reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    template <class... Args>
+    inline auto xscalar<CT>::operator()(Args...) const noexcept -> const_reference
+    {
+        XTENSOR_CHECK_DIMENSION((std::array<int, 0>()), Args()...);
+        return m_value;
+    }
+
+    template <class CT>
+    template <class... Args>
+    inline auto xscalar<CT>::unchecked(Args...) const noexcept -> const_reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    template <class It>
+    inline auto xscalar<CT>::element(It, It) noexcept -> reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    template <class It>
+    inline auto xscalar<CT>::element(It, It) const noexcept -> const_reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::expression() noexcept -> xexpression_type&
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    template <class S>
+    inline bool xscalar<CT>::broadcast_shape(S&, bool) const noexcept
+    {
+        return true;
+    }
+
+    template <class CT>
+    template <class S>
+    inline bool xscalar<CT>::has_linear_assign(const S&) const noexcept
+    {
+        return true;
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::begin() noexcept -> iterator
+    {
+        return &m_value;
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::end() noexcept -> iterator
+    {
+        return &m_value + 1;
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::begin() const noexcept -> const_iterator
+    {
+        return &m_value;
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::end() const noexcept -> const_iterator
+    {
+        return &m_value + 1;
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::cbegin() const noexcept -> const_iterator
+    {
+        return &m_value;
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::cend() const noexcept -> const_iterator
+    {
+        return &m_value + 1;
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::rbegin() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(end());
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::rend() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(begin());
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::rbegin() const noexcept -> const_reverse_iterator
+    {
+        return crbegin();
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::rend() const noexcept -> const_reverse_iterator
+    {
+        return crend();
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::crbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(cend());
+    }
+
+    template <class CT>
+    template <layout_type L>
+    inline auto xscalar<CT>::crend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(cbegin());
+    }
+
+    /*****************************
+     * Broadcasting iterator api *
+     *****************************/
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::begin(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return iterable_base::template begin<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::end(const S& shape) noexcept -> broadcast_iterator<S, L>
+    {
+        return iterable_base::template end<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::begin(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template begin<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::end(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template end<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::cbegin(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template cbegin<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::cend(const S& shape) const noexcept -> const_broadcast_iterator<S, L>
+    {
+        return iterable_base::template cend<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::rbegin(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rbegin<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::rend(const S& shape) noexcept -> reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rend<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::rbegin(const S& shape) const noexcept -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rbegin<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::rend(const S& shape) const noexcept -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template rend<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::crbegin(const S& shape) const noexcept -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template crbegin<S, L>(shape);
+    }
+
+    template <class CT>
+    template <class S, layout_type L>
+    inline auto xscalar<CT>::crend(const S& shape) const noexcept -> const_reverse_broadcast_iterator<S, L>
+    {
+        return iterable_base::template crend<S, L>(shape);
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_begin() noexcept -> iterator
+    {
+        return this->template begin<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_end() noexcept -> iterator
+    {
+        return this->template end<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_begin() const noexcept -> const_iterator
+    {
+        return this->template begin<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_end() const noexcept -> const_iterator
+    {
+        return this->template end<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_cbegin() const noexcept -> const_iterator
+    {
+        return this->template cbegin<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_cend() const noexcept -> const_iterator
+    {
+        return this->template cend<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_rbegin() noexcept -> reverse_iterator
+    {
+        return this->template rbegin<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_rend() noexcept -> reverse_iterator
+    {
+        return this->template rend<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_rbegin() const noexcept -> const_reverse_iterator
+    {
+        return this->template rbegin<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_rend() const noexcept -> const_reverse_iterator
+    {
+        return this->template rend<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_crbegin() const noexcept -> const_reverse_iterator
+    {
+        return this->template crbegin<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::linear_crend() const noexcept -> const_reverse_iterator
+    {
+        return this->template crend<XTENSOR_DEFAULT_LAYOUT>();
+    }
+
+    template <class CT>
+    template <class S>
+    inline auto xscalar<CT>::stepper_begin(const S&) noexcept -> stepper
+    {
+        return stepper(this, false);
+    }
+
+    template <class CT>
+    template <class S>
+    inline auto xscalar<CT>::stepper_end(const S&, layout_type) noexcept -> stepper
+    {
+        return stepper(this);
+    }
+
+    template <class CT>
+    template <class S>
+    inline auto xscalar<CT>::stepper_begin(const S&) const noexcept -> const_stepper
+    {
+        return const_stepper(this);
+    }
+
+    template <class CT>
+    template <class S>
+    inline auto xscalar<CT>::stepper_end(const S&, layout_type) const noexcept -> const_stepper
+    {
+        return const_stepper(this);
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::dummy_begin() noexcept -> dummy_iterator
+    {
+        return dummy_iterator(this);
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::dummy_end() noexcept -> dummy_iterator
+    {
+        return dummy_iterator(this);
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::dummy_begin() const noexcept -> const_dummy_iterator
+    {
+        return const_dummy_iterator(this);
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::dummy_end() const noexcept -> const_dummy_iterator
+    {
+        return const_dummy_iterator(this);
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::data_element(size_type) noexcept -> reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::data_element(size_type) const noexcept -> const_reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::flat(size_type) noexcept -> reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    inline auto xscalar<CT>::flat(size_type) const noexcept -> const_reference
+    {
+        return m_value;
+    }
+
+    template <class CT>
+    template <class align, class simd>
+    inline void xscalar<CT>::store_simd(size_type, const simd& e)
+    {
+        m_value = static_cast<value_type>(e[0]);
+    }
+
+    template <class CT>
+    template <class align, class requested_type, std::size_t N>
+    inline auto xscalar<CT>::load_simd(size_type) const
+        -> xt_simd::simd_return_type<value_type, requested_type>
+    {
+        return xt_simd::broadcast_as<requested_type>(m_value);
+    }
+
+    template <class T>
+    inline xscalar<T&> xref(T& t)
+    {
+        return xscalar<T&>(t);
+    }
+
+    template <class T>
+    inline xscalar<const T&> xcref(T& t)
+    {
+        return xscalar<const T&>(t);
+    }
+
+    /**********************************
+     * xscalar_stepper implementation *
+     **********************************/
+
+    template <bool is_const, class CT>
+    inline xscalar_stepper<is_const, CT>::xscalar_stepper(storage_type* c) noexcept
+        : p_c(c)
+    {
+    }
+
+    template <bool is_const, class CT>
+    inline auto xscalar_stepper<is_const, CT>::operator*() const noexcept -> reference
+    {
+        return p_c->operator()();
+    }
+
+    template <bool is_const, class CT>
+    inline void xscalar_stepper<is_const, CT>::step(size_type /*dim*/, size_type /*n*/) noexcept
+    {
+    }
+
+    template <bool is_const, class CT>
+    inline void xscalar_stepper<is_const, CT>::step_back(size_type /*dim*/, size_type /*n*/) noexcept
+    {
+    }
+
+    template <bool is_const, class CT>
+    inline void xscalar_stepper<is_const, CT>::reset(size_type /*dim*/) noexcept
+    {
+    }
+
+    template <bool is_const, class CT>
+    inline void xscalar_stepper<is_const, CT>::reset_back(size_type /*dim*/) noexcept
+    {
+    }
+
+    template <bool is_const, class CT>
+    inline void xscalar_stepper<is_const, CT>::to_begin() noexcept
+    {
+    }
+
+    template <bool is_const, class CT>
+    inline void xscalar_stepper<is_const, CT>::to_end(layout_type /*l*/) noexcept
+    {
+    }
+
+    template <bool is_const, class CT>
+    template <class T>
+    inline auto xscalar_stepper<is_const, CT>::step_simd() -> simd_return_type<T>
+    {
+        return simd_return_type<T>(p_c->operator()());
+    }
+
+    template <bool is_const, class CT>
+    inline void xscalar_stepper<is_const, CT>::step_leading()
+    {
+    }
+
+    /**********************************
+     * xdummy_iterator implementation *
+     **********************************/
+
+    template <bool is_const, class CT>
+    inline xdummy_iterator<is_const, CT>::xdummy_iterator(storage_type* c) noexcept
+        : p_c(c)
+    {
+    }
+
+    template <bool is_const, class CT>
+    inline auto xdummy_iterator<is_const, CT>::operator++() noexcept -> self_type&
+    {
+        return *this;
+    }
+
+    template <bool is_const, class CT>
+    inline auto xdummy_iterator<is_const, CT>::operator--() noexcept -> self_type&
+    {
+        return *this;
+    }
+
+    template <bool is_const, class CT>
+    inline auto xdummy_iterator<is_const, CT>::operator+=(difference_type) noexcept -> self_type&
+    {
+        return *this;
+    }
+
+    template <bool is_const, class CT>
+    inline auto xdummy_iterator<is_const, CT>::operator-=(difference_type) noexcept -> self_type&
+    {
+        return *this;
+    }
+
+    template <bool is_const, class CT>
+    inline auto xdummy_iterator<is_const, CT>::operator-(const self_type&) const noexcept -> difference_type
+    {
+        return 0;
+    }
+
+    template <bool is_const, class CT>
+    inline auto xdummy_iterator<is_const, CT>::operator*() const noexcept -> reference
+    {
+        return p_c->operator()();
+    }
+
+    template <bool is_const, class CT>
+    inline bool xdummy_iterator<is_const, CT>::equal(const self_type& rhs) const noexcept
+    {
+        return p_c == rhs.p_c;
+    }
+
+    template <bool is_const, class CT>
+    inline bool xdummy_iterator<is_const, CT>::less_than(const self_type& rhs) const noexcept
+    {
+        return p_c < rhs.p_c;
+    }
+
+    template <bool is_const, class CT>
+    inline bool
+    operator==(const xdummy_iterator<is_const, CT>& lhs, const xdummy_iterator<is_const, CT>& rhs) noexcept
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <bool is_const, class CT>
+    inline bool
+    operator<(const xdummy_iterator<is_const, CT>& lhs, const xdummy_iterator<is_const, CT>& rhs) noexcept
+    {
+        return lhs.less_than(rhs);
+    }
+}
+
+#endif

+ 796 - 0
3rd/numpy/include/xtensor/xsemantic.hpp

@@ -0,0 +1,796 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_SEMANTIC_HPP
+#define XTENSOR_SEMANTIC_HPP
+
+#include <functional>
+#include <utility>
+
+#include "xassign.hpp"
+#include "xexpression_traits.hpp"
+
+namespace xt
+{
+    namespace detail
+    {
+        template <class D>
+        struct is_sharable
+        {
+            static constexpr bool value = true;
+        };
+
+        template <class ET, class S, layout_type L, bool SH, class Tag>
+        struct is_sharable<xfixed_container<ET, S, L, SH, Tag>>
+        {
+            static constexpr bool value = SH;
+        };
+
+        template <class ET, class S, layout_type L, bool SH, class Tag>
+        struct is_sharable<xfixed_adaptor<ET, S, L, SH, Tag>>
+        {
+            static constexpr bool value = SH;
+        };
+    }
+
+    template <class D>
+    using select_expression_base_t = std::
+        conditional_t<detail::is_sharable<D>::value, xsharable_expression<D>, xexpression<D>>;
+
+    /**
+     * @class xsemantic_base
+     * @brief Base interface for assignable xexpressions.
+     *
+     * The xsemantic_base class defines the interface for assignable
+     * xexpressions.
+     *
+     * @tparam D The derived type, i.e. the inheriting class for which xsemantic_base
+     *           provides the interface.
+     */
+    template <class D>
+    class xsemantic_base : public select_expression_base_t<D>
+    {
+    public:
+
+        using base_type = select_expression_base_t<D>;
+        using derived_type = typename base_type::derived_type;
+
+        using temporary_type = typename xcontainer_inner_types<D>::temporary_type;
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator+=(const E&);
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator-=(const E&);
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator*=(const E&);
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator/=(const E&);
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator%=(const E&);
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator&=(const E&);
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator|=(const E&);
+
+        template <class E>
+        disable_xexpression<E, derived_type&> operator^=(const E&);
+
+        template <class E>
+        derived_type& operator+=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& operator-=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& operator*=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& operator/=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& operator%=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& operator&=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& operator|=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& operator^=(const xexpression<E>&);
+
+        template <class E>
+        derived_type& assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& plus_assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& minus_assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& multiplies_assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& divides_assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& modulus_assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& bit_and_assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& bit_or_assign(const xexpression<E>&);
+
+        template <class E>
+        derived_type& bit_xor_assign(const xexpression<E>&);
+
+    protected:
+
+        xsemantic_base() = default;
+        ~xsemantic_base() = default;
+
+        xsemantic_base(const xsemantic_base&) = default;
+        xsemantic_base& operator=(const xsemantic_base&) = default;
+
+        xsemantic_base(xsemantic_base&&) = default;
+        xsemantic_base& operator=(xsemantic_base&&) = default;
+
+        template <class E>
+        derived_type& operator=(const xexpression<E>&);
+    };
+
+    template <class E>
+    using is_assignable = is_crtp_base_of<xsemantic_base, E>;
+
+    template <class E, class R = void>
+    using enable_assignable = typename std::enable_if<is_assignable<E>::value, R>::type;
+
+    template <class E, class R = void>
+    using disable_assignable = typename std::enable_if<!is_assignable<E>::value, R>::type;
+
+    /**
+     * @class xcontainer_semantic
+     * @brief Implementation of the xsemantic_base interface
+     * for dense multidimensional containers.
+     *
+     * The xcontainer_semantic class is an implementation of the
+     * xsemantic_base interface for dense multidimensional
+     * containers.
+     *
+     * @tparam D the derived type
+     */
+    template <class D>
+    class xcontainer_semantic : public xsemantic_base<D>
+    {
+    public:
+
+        using base_type = xsemantic_base<D>;
+        using derived_type = D;
+        using temporary_type = typename base_type::temporary_type;
+
+        derived_type& assign_temporary(temporary_type&&);
+
+        template <class E>
+        derived_type& assign_xexpression(const xexpression<E>& e);
+
+        template <class E>
+        derived_type& computed_assign(const xexpression<E>& e);
+
+        template <class E, class F>
+        derived_type& scalar_computed_assign(const E& e, F&& f);
+
+    protected:
+
+        xcontainer_semantic() = default;
+        ~xcontainer_semantic() = default;
+
+        xcontainer_semantic(const xcontainer_semantic&) = default;
+        xcontainer_semantic& operator=(const xcontainer_semantic&) = default;
+
+        xcontainer_semantic(xcontainer_semantic&&) = default;
+        xcontainer_semantic& operator=(xcontainer_semantic&&) = default;
+
+        template <class E>
+        derived_type& operator=(const xexpression<E>&);
+    };
+
+    template <class E>
+    using has_container_semantics = is_crtp_base_of<xcontainer_semantic, E>;
+
+    template <class E, class R = void>
+    using enable_xcontainer_semantics = typename std::enable_if<has_container_semantics<E>::value, R>::type;
+
+    template <class E, class R = void>
+    using disable_xcontainer_semantics = typename std::enable_if<!has_container_semantics<E>::value, R>::type;
+
+
+    template <class D>
+    class xview_semantic;
+
+    template <class E>
+    struct overlapping_memory_checker_traits<
+        E,
+        std::enable_if_t<!has_memory_address<E>::value && is_crtp_base_of<xview_semantic, E>::value>>
+    {
+        static bool check_overlap(const E& expr, const memory_range& dst_range)
+        {
+            if (expr.size() == 0)
+            {
+                return false;
+            }
+            else
+            {
+                using ChildE = std::decay_t<decltype(expr.expression())>;
+                return overlapping_memory_checker_traits<ChildE>::check_overlap(expr.expression(), dst_range);
+            }
+        }
+    };
+
+    /**
+     * @class xview_semantic
+     * @brief Implementation of the xsemantic_base interface for
+     * multidimensional views
+     *
+     * The xview_semantic is an implementation of the xsemantic_base
+     * interface for multidimensional views.
+     *
+     * @tparam D the derived type
+     */
+    template <class D>
+    class xview_semantic : public xsemantic_base<D>
+    {
+    public:
+
+        using base_type = xsemantic_base<D>;
+        using derived_type = D;
+        using temporary_type = typename base_type::temporary_type;
+
+        derived_type& assign_temporary(temporary_type&&);
+
+        template <class E>
+        derived_type& assign_xexpression(const xexpression<E>& e);
+
+        template <class E>
+        derived_type& computed_assign(const xexpression<E>& e);
+
+        template <class E, class F>
+        derived_type& scalar_computed_assign(const E& e, F&& f);
+
+    protected:
+
+        xview_semantic() = default;
+        ~xview_semantic() = default;
+
+        xview_semantic(const xview_semantic&) = default;
+        xview_semantic& operator=(const xview_semantic&) = default;
+
+        xview_semantic(xview_semantic&&) = default;
+        xview_semantic& operator=(xview_semantic&&) = default;
+
+        template <class E>
+        derived_type& operator=(const xexpression<E>&);
+    };
+
+    template <class E>
+    using has_view_semantics = is_crtp_base_of<xview_semantic, E>;
+
+    template <class E, class R = void>
+    using enable_xview_semantics = typename std::enable_if<has_view_semantics<E>::value, R>::type;
+
+    template <class E, class R = void>
+    using disable_xview_semantics = typename std::enable_if<!has_view_semantics<E>::value, R>::type;
+
+    /*********************************
+     * xsemantic_base implementation *
+     *********************************/
+
+    /**
+     * @name Computed assignement
+     */
+    //@{
+    /**
+     * Adds the scalar \c e to \c *this.
+     * @param e the scalar to add.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator+=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::plus<>());
+    }
+
+    /**
+     * Subtracts the scalar \c e from \c *this.
+     * @param e the scalar to subtract.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator-=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::minus<>());
+    }
+
+    /**
+     * Multiplies \c *this with the scalar \c e.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator*=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::multiplies<>());
+    }
+
+    /**
+     * Divides \c *this by the scalar \c e.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator/=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::divides<>());
+    }
+
+    /**
+     * Computes the remainder of \c *this after division by the scalar \c e.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator%=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::modulus<>());
+    }
+
+    /**
+     * Computes the bitwise and of \c *this and the scalar \c e and assigns it to \c *this.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator&=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::bit_and<>());
+    }
+
+    /**
+     * Computes the bitwise or of \c *this and the scalar \c e and assigns it to \c *this.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator|=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::bit_or<>());
+    }
+
+    /**
+     * Computes the bitwise xor of \c *this and the scalar \c e and assigns it to \c *this.
+     * @param e the scalar involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator^=(const E& e) -> disable_xexpression<E, derived_type&>
+    {
+        return this->derived_cast().scalar_computed_assign(e, std::bit_xor<>());
+    }
+
+    /**
+     * Adds the xexpression \c e to \c *this.
+     * @param e the xexpression to add.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator+=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() + e.derived_cast();
+    }
+
+    /**
+     * Subtracts the xexpression \c e from \c *this.
+     * @param e the xexpression to subtract.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator-=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() - e.derived_cast();
+    }
+
+    /**
+     * Multiplies \c *this with the xexpression \c e.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator*=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() * e.derived_cast();
+    }
+
+    /**
+     * Divides \c *this by the xexpression \c e.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator/=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() / e.derived_cast();
+    }
+
+    /**
+     * Computes the remainder of \c *this after division by the xexpression \c e.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator%=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() % e.derived_cast();
+    }
+
+    /**
+     * Computes the bitwise and of \c *this and the xexpression \c e and assigns it to \c *this.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator&=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() & e.derived_cast();
+    }
+
+    /**
+     * Computes the bitwise or of \c *this and the xexpression \c e and assigns it to \c *this.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator|=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() | e.derived_cast();
+    }
+
+    /**
+     * Computes the bitwise xor of \c *this and the xexpression \c e and assigns it to \c *this.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator^=(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast() = this->derived_cast() ^ e.derived_cast();
+    }
+
+    //@}
+
+    /**
+     * @name Assign functions
+     */
+
+    /**
+     * Assigns the xexpression \c e to \c *this. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression to assign.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().assign_xexpression(e);
+    }
+
+    /**
+     * Adds the xexpression \c e to \c *this. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression to add.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::plus_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() + e.derived_cast());
+    }
+
+    /**
+     * Subtracts the xexpression \c e to \c *this. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression to subtract.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::minus_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() - e.derived_cast());
+    }
+
+    /**
+     * Multiplies \c *this with the xexpression \c e. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::multiplies_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() * e.derived_cast());
+    }
+
+    /**
+     * Divides \c *this by the xexpression \c e. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::divides_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() / e.derived_cast());
+    }
+
+    /**
+     * Computes the remainder of \c *this after division by the xexpression \c e.
+     * Ensures no temporary will be used to perform the assignment.
+     * @param e the xexpression involved in the operation.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::modulus_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() % e.derived_cast());
+    }
+
+    /**
+     * Computes the bitwise and of \c e to \c *this. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression to add.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::bit_and_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() & e.derived_cast());
+    }
+
+    /**
+     * Computes the bitwise or of \c e to \c *this. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression to add.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::bit_or_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() | e.derived_cast());
+    }
+
+    /**
+     * Computes the bitwise xor of \c e to \c *this. Ensures no temporary
+     * will be used to perform the assignment.
+     * @param e the xexpression to add.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::bit_xor_assign(const xexpression<E>& e) -> derived_type&
+    {
+        return this->derived_cast().computed_assign(this->derived_cast() ^ e.derived_cast());
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xsemantic_base<D>::operator=(const xexpression<E>& e) -> derived_type&
+    {
+#ifdef XTENSOR_FORCE_TEMPORARY_MEMORY_IN_ASSIGNMENTS
+        temporary_type tmp(e);
+        return this->derived_cast().assign_temporary(std::move(tmp));
+#else
+        auto&& this_derived = this->derived_cast();
+        auto memory_checker = make_overlapping_memory_checker(this_derived);
+        if (memory_checker.check_overlap(e.derived_cast()))
+        {
+            temporary_type tmp(e);
+            return this_derived.assign_temporary(std::move(tmp));
+        }
+        else
+        {
+            return this->assign(e);
+        }
+#endif
+    }
+
+    /**************************************
+     * xcontainer_semantic implementation *
+     **************************************/
+
+    /**
+     * Assigns the temporary \c tmp to \c *this.
+     * @param tmp the temporary to assign.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    inline auto xcontainer_semantic<D>::assign_temporary(temporary_type&& tmp) -> derived_type&
+    {
+        return (this->derived_cast() = std::move(tmp));
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xcontainer_semantic<D>::assign_xexpression(const xexpression<E>& e) -> derived_type&
+    {
+        xt::assign_xexpression(*this, e);
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xcontainer_semantic<D>::computed_assign(const xexpression<E>& e) -> derived_type&
+    {
+        xt::computed_assign(*this, e);
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class E, class F>
+    inline auto xcontainer_semantic<D>::scalar_computed_assign(const E& e, F&& f) -> derived_type&
+    {
+        xt::scalar_computed_assign(*this, e, std::forward<F>(f));
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xcontainer_semantic<D>::operator=(const xexpression<E>& e) -> derived_type&
+    {
+        return base_type::operator=(e);
+    }
+
+    /*********************************
+     * xview_semantic implementation *
+     *********************************/
+
+    /**
+     * Assigns the temporary \c tmp to \c *this.
+     * @param tmp the temporary to assign.
+     * @return a reference to \c *this.
+     */
+    template <class D>
+    inline auto xview_semantic<D>::assign_temporary(temporary_type&& tmp) -> derived_type&
+    {
+        this->derived_cast().assign_temporary_impl(std::move(tmp));
+        return this->derived_cast();
+    }
+
+    namespace detail
+    {
+        template <class F>
+        bool get_rhs_triviality(const F&)
+        {
+            return true;
+        }
+
+        template <class F, class R, class... CT>
+        bool get_rhs_triviality(const xfunction<F, R, CT...>& rhs)
+        {
+            using index_type = xindex_type_t<typename xfunction<F, R, CT...>::shape_type>;
+            using size_type = typename index_type::size_type;
+            size_type size = rhs.dimension();
+            index_type shape = uninitialized_shape<index_type>(size);
+            bool trivial_broadcast = rhs.broadcast_shape(shape, true);
+            return trivial_broadcast;
+        }
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xview_semantic<D>::assign_xexpression(const xexpression<E>& e) -> derived_type&
+    {
+        xt::assert_compatible_shape(*this, e);
+        xt::assign_data(*this, e, detail::get_rhs_triviality(e.derived_cast()));
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xview_semantic<D>::computed_assign(const xexpression<E>& e) -> derived_type&
+    {
+        xt::assert_compatible_shape(*this, e);
+        xt::assign_data(*this, e, detail::get_rhs_triviality(e.derived_cast()));
+        return this->derived_cast();
+    }
+
+    namespace xview_semantic_detail
+    {
+        template <class D>
+        auto get_begin(D&& lhs, std::true_type)
+        {
+            return lhs.linear_begin();
+        }
+
+        template <class D>
+        auto get_begin(D&& lhs, std::false_type)
+        {
+            return lhs.begin();
+        }
+    }
+
+    template <class D>
+    template <class E, class F>
+    inline auto xview_semantic<D>::scalar_computed_assign(const E& e, F&& f) -> derived_type&
+    {
+        D& d = this->derived_cast();
+
+        using size_type = typename D::size_type;
+        auto dst = xview_semantic_detail::get_begin(d, std::integral_constant<bool, D::contiguous_layout>());
+        for (size_type i = d.size(); i > 0; --i)
+        {
+            *dst = f(*dst, e);
+            ++dst;
+        }
+        return this->derived_cast();
+    }
+
+    template <class D>
+    template <class E>
+    inline auto xview_semantic<D>::operator=(const xexpression<E>& rhs) -> derived_type&
+    {
+        bool cond = (rhs.derived_cast().shape().size() == this->derived_cast().dimension())
+                    && std::equal(
+                        this->derived_cast().shape().begin(),
+                        this->derived_cast().shape().end(),
+                        rhs.derived_cast().shape().begin()
+                    );
+
+        if (!cond)
+        {
+            base_type::operator=(broadcast(rhs.derived_cast(), this->derived_cast().shape()));
+        }
+        else
+        {
+            base_type::operator=(rhs);
+        }
+        return this->derived_cast();
+    }
+}
+
+#endif

+ 213 - 0
3rd/numpy/include/xtensor/xset_operation.hpp

@@ -0,0 +1,213 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_XSET_OPERATION_HPP
+#define XTENSOR_XSET_OPERATION_HPP
+
+#include <algorithm>
+#include <functional>
+#include <type_traits>
+
+#include <xtl/xsequence.hpp>
+
+#include "xfunction.hpp"
+#include "xmath.hpp"
+#include "xscalar.hpp"
+#include "xstrided_view.hpp"
+#include "xstrides.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    namespace detail
+    {
+
+        template <bool lvalue>
+        struct lambda_isin
+        {
+            template <class E>
+            static auto make(E&& e)
+            {
+                return [&e](const auto& t)
+                {
+                    return std::find(e.begin(), e.end(), t) != e.end();
+                };
+            }
+        };
+
+        template <>
+        struct lambda_isin<false>
+        {
+            template <class E>
+            static auto make(E&& e)
+            {
+                return [e](const auto& t)
+                {
+                    return std::find(e.begin(), e.end(), t) != e.end();
+                };
+            }
+        };
+
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief isin
+     *
+     * Returns a boolean array of the same shape as ``element`` that is ``true`` where an element of
+     * ``element`` is in ``test_elements`` and ``False`` otherwise.
+     * @param element an \ref xexpression
+     * @param test_elements an array
+     * @return a boolean array
+     */
+    template <class E, class T>
+    inline auto isin(E&& element, std::initializer_list<T> test_elements) noexcept
+    {
+        auto lambda = [test_elements](const auto& t)
+        {
+            return std::find(test_elements.begin(), test_elements.end(), t) != test_elements.end();
+        };
+        return make_lambda_xfunction(std::move(lambda), std::forward<E>(element));
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief isin
+     *
+     * Returns a boolean array of the same shape as ``element`` that is ``true`` where an element of
+     * ``element`` is in ``test_elements`` and ``False`` otherwise.
+     * @param element an \ref xexpression
+     * @param test_elements an array
+     * @return a boolean array
+     */
+    template <class E, class F, class = typename std::enable_if_t<has_iterator_interface<F>::value>>
+    inline auto isin(E&& element, F&& test_elements) noexcept
+    {
+        auto lambda = detail::lambda_isin<std::is_lvalue_reference<F>::value>::make(std::forward<F>(test_elements
+        ));
+        return make_lambda_xfunction(std::move(lambda), std::forward<E>(element));
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief isin
+     *
+     * Returns a boolean array of the same shape as ``element`` that is ``true`` where an element of
+     * ``element`` is in ``test_elements`` and ``False`` otherwise.
+     * @param element an \ref xexpression
+     * @param test_elements_begin iterator to the beginning of an array
+     * @param test_elements_end iterator to the end of an array
+     * @return a boolean array
+     */
+    template <class E, class I, class = typename std::enable_if_t<is_iterator<I>::value>>
+    inline auto isin(E&& element, I&& test_elements_begin, I&& test_elements_end) noexcept
+    {
+        auto lambda = [&test_elements_begin, &test_elements_end](const auto& t)
+        {
+            return std::find(test_elements_begin, test_elements_end, t) != test_elements_end;
+        };
+        return make_lambda_xfunction(std::move(lambda), std::forward<E>(element));
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief in1d
+     *
+     * Returns a boolean array of the same shape as ``element`` that is ``true`` where an element of
+     * ``element`` is in ``test_elements`` and ``False`` otherwise.
+     * @param element an \ref xexpression
+     * @param test_elements an array
+     * @return a boolean array
+     */
+    template <class E, class T>
+    inline auto in1d(E&& element, std::initializer_list<T> test_elements) noexcept
+    {
+        XTENSOR_ASSERT(element.dimension() == 1ul);
+        return isin(std::forward<E>(element), std::forward<std::initializer_list<T>>(test_elements));
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief in1d
+     *
+     * Returns a boolean array of the same shape as ``element`` that is ``true`` where an element of
+     * ``element`` is in ``test_elements`` and ``False`` otherwise.
+     * @param element an \ref xexpression
+     * @param test_elements an array
+     * @return a boolean array
+     */
+    template <class E, class F, class = typename std::enable_if_t<has_iterator_interface<F>::value>>
+    inline auto in1d(E&& element, F&& test_elements) noexcept
+    {
+        XTENSOR_ASSERT(element.dimension() == 1ul);
+        XTENSOR_ASSERT(test_elements.dimension() == 1ul);
+        return isin(std::forward<E>(element), std::forward<F>(test_elements));
+    }
+
+    /**
+     * @ingroup logical_operators
+     * @brief in1d
+     *
+     * Returns a boolean array of the same shape as ``element`` that is ``true`` where an element of
+     * ``element`` is in ``test_elements`` and ``False`` otherwise.
+     * @param element an \ref xexpression
+     * @param test_elements_begin iterator to the beginning of an array
+     * @param test_elements_end iterator to the end of an array
+     * @return a boolean array
+     */
+    template <class E, class I, class = typename std::enable_if_t<is_iterator<I>::value>>
+    inline auto in1d(E&& element, I&& test_elements_begin, I&& test_elements_end) noexcept
+    {
+        XTENSOR_ASSERT(element.dimension() == 1ul);
+        return isin(
+            std::forward<E>(element),
+            std::forward<I>(test_elements_begin),
+            std::forward<I>(test_elements_end)
+        );
+    }
+
+    /**
+     * @ingroup searchsorted
+     * @brief Find indices where elements should be inserted to maintain order.
+     *
+     * @param a Input array: sorted (array_like).
+     * @param v Values to insert into a (array_like).
+     * @param right If ``false``, the index of the first suitable location found is given.
+     * @return Array of insertion points with the same shape as v.
+     */
+    template <class E1, class E2>
+    inline auto searchsorted(E1&& a, E2&& v, bool right = true)
+    {
+        XTENSOR_ASSERT(std::is_sorted(a.cbegin(), a.cend()));
+
+        auto out = xt::empty<size_t>(v.shape());
+
+        if (right)
+        {
+            for (size_t i = 0; i < v.size(); ++i)
+            {
+                out(i) = static_cast<std::size_t>(std::lower_bound(a.cbegin(), a.cend(), v(i)) - a.cbegin());
+            }
+        }
+        else
+        {
+            for (size_t i = 0; i < v.size(); ++i)
+            {
+                out(i) = static_cast<std::size_t>(std::upper_bound(a.cbegin(), a.cend(), v(i)) - a.cbegin());
+            }
+        }
+
+
+        return out;
+    }
+
+}
+
+#endif

+ 578 - 0
3rd/numpy/include/xtensor/xshape.hpp

@@ -0,0 +1,578 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_XSHAPE_HPP
+#define XTENSOR_XSHAPE_HPP
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <initializer_list>
+#include <iterator>
+#include <memory>
+
+#include "xlayout.hpp"
+#include "xstorage.hpp"
+#include "xtensor_forward.hpp"
+
+namespace xt
+{
+    template <class T>
+    using dynamic_shape = svector<T, 4>;
+
+    template <class T, std::size_t N>
+    using static_shape = std::array<T, N>;
+
+    template <std::size_t... X>
+    class fixed_shape;
+
+    using xindex = dynamic_shape<std::size_t>;
+
+    template <class S1, class S2>
+    bool same_shape(const S1& s1, const S2& s2) noexcept;
+
+    template <class U>
+    struct initializer_dimension;
+
+    template <class R, class T>
+    constexpr R shape(T t);
+
+    template <class R = std::size_t, class T, std::size_t N>
+    xt::static_shape<R, N> shape(const T (&aList)[N]);
+
+    template <class S>
+    struct static_dimension;
+
+    template <layout_type L, class S>
+    struct select_layout;
+
+    template <class... S>
+    struct promote_shape;
+
+    template <class... S>
+    struct promote_strides;
+
+    template <class S>
+    struct index_from_shape;
+}
+
+namespace xtl
+{
+    namespace detail
+    {
+        template <class S>
+        struct sequence_builder;
+
+        template <std::size_t... I>
+        struct sequence_builder<xt::fixed_shape<I...>>
+        {
+            using sequence_type = xt::fixed_shape<I...>;
+            using value_type = typename sequence_type::value_type;
+
+            inline static sequence_type make(std::size_t /*size*/)
+            {
+                return sequence_type{};
+            }
+
+            inline static sequence_type make(std::size_t /*size*/, value_type /*v*/)
+            {
+                return sequence_type{};
+            }
+        };
+    }
+}
+
+namespace xt
+{
+    /**
+     * @defgroup xt_xshape Support functions to get/check a shape array.
+     */
+
+    /**************
+     * same_shape *
+     **************/
+
+    /**
+     * Check if two objects have the same shape.
+     *
+     * @ingroup xt_xshape
+     * @param s1 an array
+     * @param s2 an array
+     * @return bool
+     */
+    template <class S1, class S2>
+    inline bool same_shape(const S1& s1, const S2& s2) noexcept
+    {
+        return s1.size() == s2.size() && std::equal(s1.begin(), s1.end(), s2.begin());
+    }
+
+    /*************
+     * has_shape *
+     *************/
+
+    /**
+     * Check if an object has a certain shape.
+     *
+     * @ingroup xt_xshape
+     * @param a an array
+     * @param shape the shape to test
+     * @return bool
+     */
+    template <class E, class S>
+    inline bool has_shape(const E& e, std::initializer_list<S> shape) noexcept
+    {
+        return e.shape().size() == shape.size()
+               && std::equal(e.shape().cbegin(), e.shape().cend(), shape.begin());
+    }
+
+    /**
+     * Check if an object has a certain shape.
+     *
+     * @ingroup has_shape
+     * @param a an array
+     * @param shape the shape to test
+     * @return bool
+     */
+    template <class E, class S, class = typename std::enable_if_t<has_iterator_interface<S>::value>>
+    inline bool has_shape(const E& e, const S& shape)
+    {
+        return e.shape().size() == shape.size()
+               && std::equal(e.shape().cbegin(), e.shape().cend(), shape.begin());
+    }
+
+    /*************************
+     * initializer_dimension *
+     *************************/
+
+    namespace detail
+    {
+        template <class U>
+        struct initializer_depth_impl
+        {
+            static constexpr std::size_t value = 0;
+        };
+
+        template <class T>
+        struct initializer_depth_impl<std::initializer_list<T>>
+        {
+            static constexpr std::size_t value = 1 + initializer_depth_impl<T>::value;
+        };
+    }
+
+    template <class U>
+    struct initializer_dimension
+    {
+        static constexpr std::size_t value = detail::initializer_depth_impl<U>::value;
+    };
+
+    /*********************
+     * initializer_shape *
+     *********************/
+
+    namespace detail
+    {
+        template <std::size_t I>
+        struct initializer_shape_impl
+        {
+            template <class T>
+            static constexpr std::size_t value(T t)
+            {
+                return t.size() == 0 ? 0 : initializer_shape_impl<I - 1>::value(*t.begin());
+            }
+        };
+
+        template <>
+        struct initializer_shape_impl<0>
+        {
+            template <class T>
+            static constexpr std::size_t value(T t)
+            {
+                return t.size();
+            }
+        };
+
+        template <class R, class U, std::size_t... I>
+        constexpr R initializer_shape(U t, std::index_sequence<I...>)
+        {
+            using size_type = typename R::value_type;
+            return {size_type(initializer_shape_impl<I>::value(t))...};
+        }
+    }
+
+    template <class R, class T>
+    constexpr R shape(T t)
+    {
+        return detail::initializer_shape<R, decltype(t)>(
+            t,
+            std::make_index_sequence<initializer_dimension<decltype(t)>::value>()
+        );
+    }
+
+    /** @brief Generate an xt::static_shape of the given size. */
+    template <class R, class T, std::size_t N>
+    xt::static_shape<R, N> shape(const T (&list)[N])
+    {
+        xt::static_shape<R, N> shape;
+        std::copy(std::begin(list), std::end(list), std::begin(shape));
+        return shape;
+    }
+
+    /********************
+     * static_dimension *
+     ********************/
+
+    namespace detail
+    {
+        template <class T, class E = void>
+        struct static_dimension_impl
+        {
+            static constexpr std::ptrdiff_t value = -1;
+        };
+
+        template <class T>
+        struct static_dimension_impl<T, void_t<decltype(std::tuple_size<T>::value)>>
+        {
+            static constexpr std::ptrdiff_t value = static_cast<std::ptrdiff_t>(std::tuple_size<T>::value);
+        };
+    }
+
+    template <class S>
+    struct static_dimension
+    {
+        static constexpr std::ptrdiff_t value = detail::static_dimension_impl<S>::value;
+    };
+
+    /**
+     * Compute a layout based on a layout and a shape type.
+     *
+     * The main functionality of this function is that it reduces vectors to
+     * ``xt::layout_type::any`` so that assigning a row major 1D container to another
+     * row_major container becomes free.
+     *
+     * @ingroup xt_xshape
+     */
+    template <layout_type L, class S>
+    struct select_layout
+    {
+        static constexpr std::ptrdiff_t static_dimension = xt::static_dimension<S>::value;
+        static constexpr bool is_any = static_dimension != -1 && static_dimension <= 1
+                                       && L != layout_type::dynamic;
+        static constexpr layout_type value = is_any ? layout_type::any : L;
+    };
+
+    /*************************************
+     * promote_shape and promote_strides *
+     *************************************/
+
+    namespace detail
+    {
+        template <class T1, class T2>
+        constexpr std::common_type_t<T1, T2> imax(const T1& a, const T2& b)
+        {
+            return a > b ? a : b;
+        }
+
+        // Variadic meta-function returning the maximal size of std::arrays.
+        template <class... T>
+        struct max_array_size;
+
+        template <>
+        struct max_array_size<>
+        {
+            static constexpr std::size_t value = 0;
+        };
+
+        template <class T, class... Ts>
+        struct max_array_size<T, Ts...>
+            : std::integral_constant<std::size_t, imax(std::tuple_size<T>::value, max_array_size<Ts...>::value)>
+        {
+        };
+
+        // Broadcasting for fixed shapes
+        template <std::size_t IDX, std::size_t... X>
+        struct at
+        {
+            static constexpr std::size_t arr[sizeof...(X)] = {X...};
+            static constexpr std::size_t value = (IDX < sizeof...(X)) ? arr[IDX] : 0;
+        };
+
+        template <class S1, class S2>
+        struct broadcast_fixed_shape;
+
+        template <class IX, class A, class B>
+        struct broadcast_fixed_shape_impl;
+
+        template <std::size_t IX, class A, class B>
+        struct broadcast_fixed_shape_cmp_impl;
+
+        template <std::size_t JX, std::size_t... I, std::size_t... J>
+        struct broadcast_fixed_shape_cmp_impl<JX, fixed_shape<I...>, fixed_shape<J...>>
+        {
+            // We line the shapes up from the last index
+            // IX may underflow, thus being a very large number
+            static constexpr std::size_t IX = JX - (sizeof...(J) - sizeof...(I));
+
+            // Out of bounds access gives value 0
+            static constexpr std::size_t I_v = at<IX, I...>::value;
+            static constexpr std::size_t J_v = at<JX, J...>::value;
+
+            // we're statically checking if the broadcast shapes are either one on either of them or equal
+            static_assert(!I_v || I_v == 1 || J_v == 1 || J_v == I_v, "broadcast shapes do not match.");
+
+            static constexpr std::size_t ordinate = (I_v > J_v) ? I_v : J_v;
+            static constexpr bool value = (I_v == J_v);
+        };
+
+        template <std::size_t... JX, std::size_t... I, std::size_t... J>
+        struct broadcast_fixed_shape_impl<std::index_sequence<JX...>, fixed_shape<I...>, fixed_shape<J...>>
+        {
+            static_assert(sizeof...(J) >= sizeof...(I), "broadcast shapes do not match.");
+
+            using type = xt::fixed_shape<
+                broadcast_fixed_shape_cmp_impl<JX, fixed_shape<I...>, fixed_shape<J...>>::ordinate...>;
+            static constexpr bool value = xtl::conjunction<
+                broadcast_fixed_shape_cmp_impl<JX, fixed_shape<I...>, fixed_shape<J...>>...>::value;
+        };
+
+        /* broadcast_fixed_shape<fixed_shape<I...>, fixed_shape<J...>>
+         * Just like a call to broadcast_shape(cont S1& input, S2& output),
+         * except that the result shape is alised as type, and the returned
+         * bool is the member value. Asserts on an illegal broadcast, including
+         * the case where pack I is strictly longer than pack J. */
+
+        template <std::size_t... I, std::size_t... J>
+        struct broadcast_fixed_shape<fixed_shape<I...>, fixed_shape<J...>>
+            : broadcast_fixed_shape_impl<std::make_index_sequence<sizeof...(J)>, fixed_shape<I...>, fixed_shape<J...>>
+        {
+        };
+
+        // Simple is_array and only_array meta-functions
+        template <class S>
+        struct is_array
+        {
+            static constexpr bool value = false;
+        };
+
+        template <class T, std::size_t N>
+        struct is_array<std::array<T, N>>
+        {
+            static constexpr bool value = true;
+        };
+
+        template <class S>
+        struct is_fixed : std::false_type
+        {
+        };
+
+        template <std::size_t... N>
+        struct is_fixed<fixed_shape<N...>> : std::true_type
+        {
+        };
+
+        template <class S>
+        struct is_scalar_shape
+        {
+            static constexpr bool value = false;
+        };
+
+        template <class T>
+        struct is_scalar_shape<std::array<T, 0>>
+        {
+            static constexpr bool value = true;
+        };
+
+        template <class... S>
+        using only_array = xtl::conjunction<xtl::disjunction<is_array<S>, is_fixed<S>>...>;
+
+        // test that at least one argument is a fixed shape. If yes, then either argument has to be fixed or
+        // scalar
+        template <class... S>
+        using only_fixed = std::integral_constant<
+            bool,
+            xtl::disjunction<is_fixed<S>...>::value
+                && xtl::conjunction<xtl::disjunction<is_fixed<S>, is_scalar_shape<S>>...>::value>;
+
+        template <class... S>
+        using all_fixed = xtl::conjunction<is_fixed<S>...>;
+
+        // The promote_index meta-function returns std::vector<promoted_value_type> in the
+        // general case and an array of the promoted value type and maximal size if all
+        // arguments are of type std::array
+
+        template <class... S>
+        struct promote_array
+        {
+            using type = std::
+                array<typename std::common_type<typename S::value_type...>::type, max_array_size<S...>::value>;
+        };
+
+        template <>
+        struct promote_array<>
+        {
+            using type = std::array<std::size_t, 0>;
+        };
+
+        template <class S>
+        struct filter_scalar
+        {
+            using type = S;
+        };
+
+        template <class T>
+        struct filter_scalar<std::array<T, 0>>
+        {
+            using type = fixed_shape<1>;
+        };
+
+        template <class S>
+        using filter_scalar_t = typename filter_scalar<S>::type;
+
+        template <class... S>
+        struct promote_fixed : promote_fixed<filter_scalar_t<S>...>
+        {
+        };
+
+        template <std::size_t... I>
+        struct promote_fixed<fixed_shape<I...>>
+        {
+            using type = fixed_shape<I...>;
+            static constexpr bool value = true;
+        };
+
+        template <std::size_t... I, std::size_t... J, class... S>
+        struct promote_fixed<fixed_shape<I...>, fixed_shape<J...>, S...>
+        {
+        private:
+
+            using intermediate = std::conditional_t<
+                (sizeof...(I) > sizeof...(J)),
+                broadcast_fixed_shape<fixed_shape<J...>, fixed_shape<I...>>,
+                broadcast_fixed_shape<fixed_shape<I...>, fixed_shape<J...>>>;
+            using result = promote_fixed<typename intermediate::type, S...>;
+
+        public:
+
+            using type = typename result::type;
+            static constexpr bool value = xtl::conjunction<intermediate, result>::value;
+        };
+
+        template <bool all_index, bool all_array, class... S>
+        struct select_promote_index;
+
+        template <class... S>
+        struct select_promote_index<true, true, S...> : promote_fixed<S...>
+        {
+        };
+
+        template <>
+        struct select_promote_index<true, true>
+        {
+            // todo correct? used in xvectorize
+            using type = dynamic_shape<std::size_t>;
+        };
+
+        template <class... S>
+        struct select_promote_index<false, true, S...> : promote_array<S...>
+        {
+        };
+
+        template <class... S>
+        struct select_promote_index<false, false, S...>
+        {
+            using type = dynamic_shape<typename std::common_type<typename S::value_type...>::type>;
+        };
+
+        template <class... S>
+        struct promote_index : select_promote_index<only_fixed<S...>::value, only_array<S...>::value, S...>
+        {
+        };
+
+        template <class T>
+        struct index_from_shape_impl
+        {
+            using type = T;
+        };
+
+        template <std::size_t... N>
+        struct index_from_shape_impl<fixed_shape<N...>>
+        {
+            using type = std::array<std::size_t, sizeof...(N)>;
+        };
+    }
+
+    template <class... S>
+    struct promote_shape
+    {
+        using type = typename detail::promote_index<S...>::type;
+    };
+
+    /**
+     * @ingroup xt_xshape
+     */
+    template <class... S>
+    using promote_shape_t = typename promote_shape<S...>::type;
+
+    template <class... S>
+    struct promote_strides
+    {
+        using type = typename detail::promote_index<S...>::type;
+    };
+
+    /**
+     * @ingroup xt_xshape
+     */
+    template <class... S>
+    using promote_strides_t = typename promote_strides<S...>::type;
+
+    template <class S>
+    struct index_from_shape
+    {
+        using type = typename detail::index_from_shape_impl<S>::type;
+    };
+
+    /**
+     * @ingroup xt_xshape
+     */
+    template <class S>
+    using index_from_shape_t = typename index_from_shape<S>::type;
+
+    /**********************
+     * filter_fixed_shape *
+     **********************/
+
+    namespace detail
+    {
+        template <class S>
+        struct filter_fixed_shape_impl
+        {
+            using type = S;
+        };
+
+        template <std::size_t... N>
+        struct filter_fixed_shape_impl<fixed_shape<N...>>
+        {
+            using type = std::array<std::size_t, sizeof...(N)>;
+        };
+    }
+
+    template <class S>
+    struct filter_fixed_shape : detail::filter_fixed_shape_impl<S>
+    {
+    };
+
+    /**
+     * @ingroup xt_xshape
+     */
+    template <class S>
+    using filter_fixed_shape_t = typename filter_fixed_shape<S>::type;
+}
+
+#endif

+ 1671 - 0
3rd/numpy/include/xtensor/xslice.hpp

@@ -0,0 +1,1671 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_SLICE_HPP
+#define XTENSOR_SLICE_HPP
+
+#include <cstddef>
+#include <map>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xtype_traits.hpp>
+
+#include "xstorage.hpp"
+#include "xtensor_config.hpp"
+#include "xutils.hpp"
+
+#ifndef XTENSOR_CONSTEXPR
+#if (defined(_MSC_VER) || __GNUC__ < 8)
+#define XTENSOR_CONSTEXPR inline
+#define XTENSOR_GLOBAL_CONSTEXPR static const
+#else
+#define XTENSOR_CONSTEXPR constexpr
+#define XTENSOR_GLOBAL_CONSTEXPR constexpr
+#endif
+#endif
+
+namespace xt
+{
+
+    /**********************
+     * xslice declaration *
+     **********************/
+
+    template <class D>
+    class xslice
+    {
+    public:
+
+        using derived_type = D;
+
+        derived_type& derived_cast() noexcept;
+        const derived_type& derived_cast() const noexcept;
+
+    protected:
+
+        xslice() = default;
+        ~xslice() = default;
+
+        xslice(const xslice&) = default;
+        xslice& operator=(const xslice&) = default;
+
+        xslice(xslice&&) = default;
+        xslice& operator=(xslice&&) = default;
+    };
+
+    template <class S>
+    using is_xslice = std::is_base_of<xslice<S>, S>;
+
+    template <class E, class R = void>
+    using disable_xslice = typename std::enable_if<!is_xslice<E>::value, R>::type;
+
+    template <class... E>
+    using has_xslice = xtl::disjunction<is_xslice<E>...>;
+
+    /**************
+     * slice tags *
+     **************/
+
+#define DEFINE_TAG_CONVERSION(NAME)                 \
+    template <class T>                              \
+    XTENSOR_CONSTEXPR NAME convert() const noexcept \
+    {                                               \
+        return NAME();                              \
+    }
+
+    struct xall_tag
+    {
+        DEFINE_TAG_CONVERSION(xall_tag)
+    };
+
+    struct xnewaxis_tag
+    {
+        DEFINE_TAG_CONVERSION(xnewaxis_tag)
+    };
+
+    struct xellipsis_tag
+    {
+        DEFINE_TAG_CONVERSION(xellipsis_tag)
+    };
+
+#undef DEFINE_TAG_CONVERSION
+
+    /**********************
+     * xrange declaration *
+     **********************/
+
+    template <class T>
+    class xrange : public xslice<xrange<T>>
+    {
+    public:
+
+        using size_type = T;
+        using self_type = xrange<T>;
+
+        xrange() = default;
+        xrange(size_type start_val, size_type stop_val) noexcept;
+
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        operator xrange<S>() const noexcept;
+
+        // Same as implicit conversion operator but more convenient to call
+        // from a variant visitor
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        xrange<S> convert() const noexcept;
+
+        size_type operator()(size_type i) const noexcept;
+
+        size_type size() const noexcept;
+        size_type step_size() const noexcept;
+        size_type step_size(std::size_t i, std::size_t n = 1) const noexcept;
+        size_type revert_index(std::size_t i) const noexcept;
+
+        bool contains(size_type i) const noexcept;
+
+        bool operator==(const self_type& rhs) const noexcept;
+        bool operator!=(const self_type& rhs) const noexcept;
+
+    private:
+
+        size_type m_start;
+        size_type m_size;
+
+        template <class S>
+        friend class xrange;
+    };
+
+    /******************************
+     * xstepped_range declaration *
+     ******************************/
+
+    template <class T>
+    class xstepped_range : public xslice<xstepped_range<T>>
+    {
+    public:
+
+        using size_type = T;
+        using self_type = xstepped_range<T>;
+
+        xstepped_range() = default;
+        xstepped_range(size_type start_val, size_type stop_val, size_type step) noexcept;
+
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        operator xstepped_range<S>() const noexcept;
+
+        // Same as implicit conversion operator but more convenient to call
+        // from a variant visitor
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        xstepped_range<S> convert() const noexcept;
+
+        size_type operator()(size_type i) const noexcept;
+
+        size_type size() const noexcept;
+        size_type step_size() const noexcept;
+        size_type step_size(std::size_t i, std::size_t n = 1) const noexcept;
+        size_type revert_index(std::size_t i) const noexcept;
+
+        bool contains(size_type i) const noexcept;
+
+        bool operator==(const self_type& rhs) const noexcept;
+        bool operator!=(const self_type& rhs) const noexcept;
+
+    private:
+
+        size_type m_start;
+        size_type m_size;
+        size_type m_step;
+
+        template <class S>
+        friend class xstepped_range;
+    };
+
+    /********************
+     * xall declaration *
+     ********************/
+
+    template <class T>
+    class xall : public xslice<xall<T>>
+    {
+    public:
+
+        using size_type = T;
+        using self_type = xall<T>;
+
+        xall() = default;
+        explicit xall(size_type size) noexcept;
+
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        operator xall<S>() const noexcept;
+
+        // Same as implicit conversion operator but more convenient to call
+        // from a variant visitor
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        xall<S> convert() const noexcept;
+
+        size_type operator()(size_type i) const noexcept;
+
+        size_type size() const noexcept;
+        size_type step_size() const noexcept;
+        size_type step_size(std::size_t i, std::size_t n = 1) const noexcept;
+        size_type revert_index(std::size_t i) const noexcept;
+
+        bool contains(size_type i) const noexcept;
+
+        bool operator==(const self_type& rhs) const noexcept;
+        bool operator!=(const self_type& rhs) const noexcept;
+
+    private:
+
+        size_type m_size;
+    };
+
+    /**
+     * Returns a slice representing a full dimension,
+     * to be used as an argument of view function.
+     * @sa view, strided_view
+     */
+    inline auto all() noexcept
+    {
+        return xall_tag();
+    }
+
+    /**
+     * Returns a slice representing all remaining dimensions,
+     * and selecting all in these dimensions. Ellipsis will expand
+     * to a series of `all()` slices, until the number of slices is
+     * equal to the number of dimensions of the source array.
+     *
+     * Note: ellipsis can only be used in strided_view!
+     *
+     * @code{.cpp}
+     * xarray<double> a = xarray<double>::from_shape({5, 5, 1, 1, 5});
+     * auto v = xt::strided_view(a, {2, xt::ellipsis(), 2});
+     * // equivalent to using {2, xt::all(), xt::all(), xt::all(), 2};
+     * @endcode
+     *
+     * @sa strided_view
+     */
+    inline auto ellipsis() noexcept
+    {
+        return xellipsis_tag();
+    }
+
+    /************************
+     * xnewaxis declaration *
+     ************************/
+
+    template <class T>
+    class xnewaxis : public xslice<xnewaxis<T>>
+    {
+    public:
+
+        using size_type = T;
+        using self_type = xnewaxis<T>;
+
+        xnewaxis() = default;
+
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        operator xnewaxis<S>() const noexcept;
+
+        // Same as implicit conversion operator but more convenient to call
+        // from a variant visitor
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        xnewaxis<S> convert() const noexcept;
+
+        size_type operator()(size_type i) const noexcept;
+
+        size_type size() const noexcept;
+        size_type step_size() const noexcept;
+        size_type step_size(std::size_t i, std::size_t n = 1) const noexcept;
+        size_type revert_index(std::size_t i) const noexcept;
+
+        bool contains(size_type i) const noexcept;
+
+        bool operator==(const self_type& rhs) const noexcept;
+        bool operator!=(const self_type& rhs) const noexcept;
+    };
+
+    /**
+     * Returns a slice representing a new axis of length one,
+     * to be used as an argument of view function.
+     * @sa view, strided_view
+     */
+    inline auto newaxis() noexcept
+    {
+        return xnewaxis_tag();
+    }
+
+    /***************************
+     * xkeep_slice declaration *
+     ***************************/
+
+    template <class T>
+    class xkeep_slice;
+
+    namespace detail
+    {
+        template <class T>
+        struct is_xkeep_slice : std::false_type
+        {
+        };
+
+        template <class T>
+        struct is_xkeep_slice<xkeep_slice<T>> : std::true_type
+        {
+        };
+
+        template <class T>
+        using disable_xkeep_slice_t = std::enable_if_t<!is_xkeep_slice<std::decay_t<T>>::value, void>;
+
+        template <class T>
+        using enable_xkeep_slice_t = std::enable_if_t<is_xkeep_slice<std::decay_t<T>>::value, void>;
+    }
+
+    template <class T>
+    class xkeep_slice : public xslice<xkeep_slice<T>>
+    {
+    public:
+
+        using container_type = svector<T>;
+        using size_type = typename container_type::value_type;
+        using self_type = xkeep_slice<T>;
+
+        template <class C, typename = detail::disable_xkeep_slice_t<C>>
+        explicit xkeep_slice(C& cont);
+        explicit xkeep_slice(container_type&& cont);
+
+        template <class S>
+        xkeep_slice(std::initializer_list<S> t);
+
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        operator xkeep_slice<S>() const noexcept;
+
+        // Same as implicit conversion operator but more convenient to call
+        // from a variant visitor
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        xkeep_slice<S> convert() const noexcept;
+
+        size_type operator()(size_type i) const noexcept;
+        size_type size() const noexcept;
+
+        void normalize(std::size_t s);
+
+        size_type step_size(std::size_t i, std::size_t n = 1) const noexcept;
+        size_type revert_index(std::size_t i) const;
+
+        bool contains(size_type i) const noexcept;
+
+        bool operator==(const self_type& rhs) const noexcept;
+        bool operator!=(const self_type& rhs) const noexcept;
+
+    private:
+
+        xkeep_slice() = default;
+
+        container_type m_indices;
+        container_type m_raw_indices;
+
+        template <class S>
+        friend class xkeep_slice;
+    };
+
+    namespace detail
+    {
+        template <class T>
+        using disable_integral_keep = std::enable_if_t<
+            !xtl::is_integral<std::decay_t<T>>::value,
+            xkeep_slice<typename std::decay_t<T>::value_type>>;
+
+        template <class T, class R>
+        using enable_integral_keep = std::enable_if_t<xtl::is_integral<T>::value, xkeep_slice<R>>;
+    }
+
+    /**
+     * Create a non-contigous slice from a container of indices to keep.
+     * Note: this slice cannot be used in the xstrided_view!
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = xt::arange(9);
+     * a.reshape({3, 3});
+     * xt::view(a, xt::keep(0, 2); // => {{0, 1, 2}, {6, 7, 8}}
+     * xt::view(a, xt::keep(1, 1, 1); // => {{3, 4, 5}, {3, 4, 5}, {3, 4, 5}}
+     * @endcode
+     *
+     * @param indices The indices container
+     * @return instance of xkeep_slice
+     */
+    template <class T>
+    inline detail::disable_integral_keep<T> keep(T&& indices)
+    {
+        return xkeep_slice<typename std::decay_t<T>::value_type>(std::forward<T>(indices));
+    }
+
+    template <class R = std::ptrdiff_t, class T>
+    inline detail::enable_integral_keep<T, R> keep(T i)
+    {
+        using slice_type = xkeep_slice<R>;
+        using container_type = typename slice_type::container_type;
+        container_type tmp = {static_cast<R>(i)};
+        return slice_type(std::move(tmp));
+    }
+
+    template <class R = std::ptrdiff_t, class Arg0, class Arg1, class... Args>
+    inline xkeep_slice<R> keep(Arg0 i0, Arg1 i1, Args... args)
+    {
+        using slice_type = xkeep_slice<R>;
+        using container_type = typename slice_type::container_type;
+        container_type tmp = {static_cast<R>(i0), static_cast<R>(i1), static_cast<R>(args)...};
+        return slice_type(std::move(tmp));
+    }
+
+    /***************************
+     * xdrop_slice declaration *
+     ***************************/
+
+    template <class T>
+    class xdrop_slice;
+
+    namespace detail
+    {
+        template <class T>
+        struct is_xdrop_slice : std::false_type
+        {
+        };
+
+        template <class T>
+        struct is_xdrop_slice<xdrop_slice<T>> : std::true_type
+        {
+        };
+
+        template <class T>
+        using disable_xdrop_slice_t = std::enable_if_t<!is_xdrop_slice<std::decay_t<T>>::value, void>;
+
+        template <class T>
+        using enable_xdrop_slice_t = std::enable_if_t<is_xdrop_slice<std::decay_t<T>>::value, void>;
+    }
+
+    template <class T>
+    class xdrop_slice : public xslice<xdrop_slice<T>>
+    {
+    public:
+
+        using container_type = svector<T>;
+        using size_type = typename container_type::value_type;
+        using self_type = xdrop_slice<T>;
+
+        template <class C, typename = detail::disable_xdrop_slice_t<C>>
+        explicit xdrop_slice(C& cont);
+        explicit xdrop_slice(container_type&& cont);
+
+        template <class S>
+        xdrop_slice(std::initializer_list<S> t);
+
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        operator xdrop_slice<S>() const noexcept;
+
+        // Same as implicit conversion operator but more convenient to call
+        // from a variant visitor
+        template <class S, typename = std::enable_if_t<std::is_convertible<S, T>::value, void>>
+        xdrop_slice<S> convert() const noexcept;
+
+        size_type operator()(size_type i) const noexcept;
+        size_type size() const noexcept;
+
+        void normalize(std::size_t s);
+
+        size_type step_size(std::size_t i, std::size_t n = 1) const noexcept;
+        size_type revert_index(std::size_t i) const;
+
+        bool contains(size_type i) const noexcept;
+
+        bool operator==(const self_type& rhs) const noexcept;
+        bool operator!=(const self_type& rhs) const noexcept;
+
+    private:
+
+        xdrop_slice() = default;
+
+        container_type m_indices;
+        container_type m_raw_indices;
+        std::map<size_type, size_type> m_inc;
+        size_type m_size;
+
+        template <class S>
+        friend class xdrop_slice;
+    };
+
+    namespace detail
+    {
+        template <class T>
+        using disable_integral_drop = std::enable_if_t<
+            !xtl::is_integral<std::decay_t<T>>::value,
+            xdrop_slice<typename std::decay_t<T>::value_type>>;
+
+        template <class T, class R>
+        using enable_integral_drop = std::enable_if_t<xtl::is_integral<T>::value, xdrop_slice<R>>;
+    }
+
+    /**
+     * Create a non-contigous slice from a container of indices to drop.
+     * Note: this slice cannot be used in the xstrided_view!
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = xt::arange(9);
+     * a.reshape({3, 3});
+     * xt::view(a, xt::drop(0, 2); // => {{3, 4, 5}}
+     * @endcode
+     *
+     * @param indices The container of indices to drop
+     * @return instance of xdrop_slice
+     */
+    template <class T>
+    inline detail::disable_integral_drop<T> drop(T&& indices)
+    {
+        return xdrop_slice<typename std::decay_t<T>::value_type>(std::forward<T>(indices));
+    }
+
+    template <class R = std::ptrdiff_t, class T>
+    inline detail::enable_integral_drop<T, R> drop(T i)
+    {
+        using slice_type = xdrop_slice<R>;
+        using container_type = typename slice_type::container_type;
+        container_type tmp = {static_cast<R>(i)};
+        return slice_type(std::move(tmp));
+    }
+
+    template <class R = std::ptrdiff_t, class Arg0, class Arg1, class... Args>
+    inline xdrop_slice<R> drop(Arg0 i0, Arg1 i1, Args... args)
+    {
+        using slice_type = xdrop_slice<R>;
+        using container_type = typename slice_type::container_type;
+        container_type tmp = {static_cast<R>(i0), static_cast<R>(i1), static_cast<R>(args)...};
+        return slice_type(std::move(tmp));
+    }
+
+    /******************************
+     * xrange_adaptor declaration *
+     ******************************/
+
+    template <class A, class B = A, class C = A>
+    struct xrange_adaptor
+    {
+        xrange_adaptor(A start_val, B stop_val, C step)
+            : m_start(start_val)
+            , m_stop(stop_val)
+            , m_step(step)
+        {
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            xtl::is_integral<MI>::value && xtl::is_integral<MA>::value && xtl::is_integral<STEP>::value,
+            xstepped_range<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            return get_stepped_range(m_start, m_stop, m_step, size);
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            !xtl::is_integral<MI>::value && xtl::is_integral<MA>::value && xtl::is_integral<STEP>::value,
+            xstepped_range<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            return get_stepped_range(m_step > 0 ? 0 : static_cast<std::ptrdiff_t>(size) - 1, m_stop, m_step, size);
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            xtl::is_integral<MI>::value && !xtl::is_integral<MA>::value && xtl::is_integral<STEP>::value,
+            xstepped_range<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            auto sz = static_cast<std::ptrdiff_t>(size);
+            return get_stepped_range(m_start, m_step > 0 ? sz : -(sz + 1), m_step, size);
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            xtl::is_integral<MI>::value && xtl::is_integral<MA>::value && !xtl::is_integral<STEP>::value,
+            xrange<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            return xrange<std::ptrdiff_t>(normalize(m_start, size), normalize(m_stop, size));
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            !xtl::is_integral<MI>::value && !xtl::is_integral<MA>::value && xtl::is_integral<STEP>::value,
+            xstepped_range<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            std::ptrdiff_t start = m_step >= 0 ? 0 : static_cast<std::ptrdiff_t>(size) - 1;
+            std::ptrdiff_t stop = m_step >= 0 ? static_cast<std::ptrdiff_t>(size) : -1;
+            return xstepped_range<std::ptrdiff_t>(start, stop, m_step);
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            xtl::is_integral<MI>::value && !xtl::is_integral<MA>::value && !xtl::is_integral<STEP>::value,
+            xrange<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            return xrange<std::ptrdiff_t>(normalize(m_start, size), static_cast<std::ptrdiff_t>(size));
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            !xtl::is_integral<MI>::value && xtl::is_integral<MA>::value && !xtl::is_integral<STEP>::value,
+            xrange<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            return xrange<std::ptrdiff_t>(0, normalize(m_stop, size));
+        }
+
+        template <class MI = A, class MA = B, class STEP = C>
+        inline std::enable_if_t<
+            !xtl::is_integral<MI>::value && !xtl::is_integral<MA>::value && !xtl::is_integral<STEP>::value,
+            xall<std::ptrdiff_t>>
+        get(std::size_t size) const
+        {
+            return xall<std::ptrdiff_t>(static_cast<std::ptrdiff_t>(size));
+        }
+
+        A start() const
+        {
+            return m_start;
+        }
+
+        B stop() const
+        {
+            return m_stop;
+        }
+
+        C step() const
+        {
+            return m_step;
+        }
+
+    private:
+
+        static auto normalize(std::ptrdiff_t val, std::size_t ssize)
+        {
+            std::ptrdiff_t size = static_cast<std::ptrdiff_t>(ssize);
+            val = (val >= 0) ? val : val + size;
+            return (std::max)(std::ptrdiff_t(0), (std::min)(size, val));
+        }
+
+        static auto
+        get_stepped_range(std::ptrdiff_t start, std::ptrdiff_t stop, std::ptrdiff_t step, std::size_t ssize)
+        {
+            std::ptrdiff_t size = static_cast<std::ptrdiff_t>(ssize);
+            start = (start >= 0) ? start : start + size;
+            stop = (stop >= 0) ? stop : stop + size;
+
+            if (step > 0)
+            {
+                start = (std::max)(std::ptrdiff_t(0), (std::min)(size, start));
+                stop = (std::max)(std::ptrdiff_t(0), (std::min)(size, stop));
+            }
+            else
+            {
+                start = (std::max)(std::ptrdiff_t(-1), (std::min)(size - 1, start));
+                stop = (std::max)(std::ptrdiff_t(-1), (std::min)(size - 1, stop));
+            }
+
+            return xstepped_range<std::ptrdiff_t>(start, stop, step);
+        }
+
+        A m_start;
+        B m_stop;
+        C m_step;
+    };
+
+    /*******************************
+     * Placeholders and rangemaker *
+     *******************************/
+
+    namespace placeholders
+    {
+        // xtensor universal placeholder
+        struct xtuph
+        {
+        };
+
+        template <class... Args>
+        struct rangemaker
+        {
+            std::ptrdiff_t rng[3];  // = { 0, 0, 0 };
+        };
+
+        XTENSOR_CONSTEXPR xtuph get_tuph_or_val(std::ptrdiff_t /*val*/, std::true_type)
+        {
+            return xtuph();
+        }
+
+        XTENSOR_CONSTEXPR std::ptrdiff_t get_tuph_or_val(std::ptrdiff_t val, std::false_type)
+        {
+            return val;
+        }
+
+        template <class A, class B, class C>
+        struct rangemaker<A, B, C>
+        {
+            XTENSOR_CONSTEXPR operator xrange_adaptor<A, B, C>()
+            {
+                return xrange_adaptor<A, B, C>(
+                    {get_tuph_or_val(rng[0], std::is_same<A, xtuph>()),
+                     get_tuph_or_val(rng[1], std::is_same<B, xtuph>()),
+                     get_tuph_or_val(rng[2], std::is_same<C, xtuph>())}
+                );
+            }
+
+            std::ptrdiff_t rng[3];  // = { 0, 0, 0 };
+        };
+
+        template <class A, class B>
+        struct rangemaker<A, B>
+        {
+            XTENSOR_CONSTEXPR operator xrange_adaptor<A, B, xt::placeholders::xtuph>()
+            {
+                return xrange_adaptor<A, B, xt::placeholders::xtuph>(
+                    {get_tuph_or_val(rng[0], std::is_same<A, xtuph>()),
+                     get_tuph_or_val(rng[1], std::is_same<B, xtuph>()),
+                     xtuph()}
+                );
+            }
+
+            std::ptrdiff_t rng[3];  // = { 0, 0, 0 };
+        };
+
+        template <class... OA>
+        XTENSOR_CONSTEXPR auto operator|(const rangemaker<OA...>& rng, const std::ptrdiff_t& t)
+        {
+            auto nrng = rangemaker<OA..., std::ptrdiff_t>({rng.rng[0], rng.rng[1], rng.rng[2]});
+            nrng.rng[sizeof...(OA)] = t;
+            return nrng;
+        }
+
+        template <class... OA>
+        XTENSOR_CONSTEXPR auto operator|(const rangemaker<OA...>& rng, const xt::placeholders::xtuph& /*t*/)
+        {
+            auto nrng = rangemaker<OA..., xt::placeholders::xtuph>({rng.rng[0], rng.rng[1], rng.rng[2]});
+            return nrng;
+        }
+
+        XTENSOR_GLOBAL_CONSTEXPR xtuph _{};
+        XTENSOR_GLOBAL_CONSTEXPR rangemaker<> _r = rangemaker<>({0, 0, 0});
+        XTENSOR_GLOBAL_CONSTEXPR xall_tag _a{};
+        XTENSOR_GLOBAL_CONSTEXPR xnewaxis_tag _n{};
+        XTENSOR_GLOBAL_CONSTEXPR xellipsis_tag _e{};
+    }
+
+    inline auto xnone()
+    {
+        return placeholders::xtuph();
+    }
+
+    namespace detail
+    {
+        template <class T, class E = void>
+        struct cast_if_integer
+        {
+            using type = T;
+
+            type operator()(T t)
+            {
+                return t;
+            }
+        };
+
+        template <class T>
+        struct cast_if_integer<T, std::enable_if_t<xtl::is_integral<T>::value>>
+        {
+            using type = std::ptrdiff_t;
+
+            type operator()(T t)
+            {
+                return static_cast<type>(t);
+            }
+        };
+
+        template <class T>
+        using cast_if_integer_t = typename cast_if_integer<T>::type;
+    }
+
+    /**
+     * Select a range from start_val to stop_val (excluded).
+     * You can use the shorthand `_` syntax to select from the start or until the end.
+     *
+     * @code{.cpp}
+     * using namespace xt::placeholders;  // to enable _ syntax
+     *
+     * range(3, _)  // select from index 3 to the end
+     * range(_, 5)  // select from index 0 to 5 (excluded)
+     * range(_, _)  // equivalent to `all()`
+     * @endcode
+     *
+     * @sa view, strided_view
+     */
+    template <class A, class B>
+    inline auto range(A start_val, B stop_val)
+    {
+        return xrange_adaptor<detail::cast_if_integer_t<A>, detail::cast_if_integer_t<B>, placeholders::xtuph>(
+            detail::cast_if_integer<A>{}(start_val),
+            detail::cast_if_integer<B>{}(stop_val),
+            placeholders::xtuph()
+        );
+    }
+
+    /**
+     * Select a range from start_val to stop_val (excluded) with step
+     * You can use the shorthand `_` syntax to select from the start or until the end.
+     *
+     * @code{.cpp}
+     * using namespace xt::placeholders;  // to enable _ syntax
+     * range(3, _, 5)  // select from index 3 to the end with stepsize 5
+     * @endcode
+     *
+     * @sa view, strided_view
+     */
+    template <class A, class B, class C>
+    inline auto range(A start_val, B stop_val, C step)
+    {
+        return xrange_adaptor<detail::cast_if_integer_t<A>, detail::cast_if_integer_t<B>, detail::cast_if_integer_t<C>>(
+            detail::cast_if_integer<A>{}(start_val),
+            detail::cast_if_integer<B>{}(stop_val),
+            detail::cast_if_integer<C>{}(step)
+        );
+    }
+
+    /******************************************************
+     * homogeneous get_size for integral types and slices *
+     ******************************************************/
+
+    template <class S>
+    inline disable_xslice<S, std::size_t> get_size(const S&) noexcept
+    {
+        return 1;
+    }
+
+    template <class S>
+    inline auto get_size(const xslice<S>& slice) noexcept
+    {
+        return slice.derived_cast().size();
+    }
+
+    /*******************************************************
+     * homogeneous step_size for integral types and slices *
+     *******************************************************/
+
+    template <class S>
+    inline disable_xslice<S, std::size_t> step_size(const S&, std::size_t) noexcept
+    {
+        return 0;
+    }
+
+    template <class S>
+    inline disable_xslice<S, std::size_t> step_size(const S&, std::size_t, std::size_t) noexcept
+    {
+        return 0;
+    }
+
+    template <class S>
+    inline auto step_size(const xslice<S>& slice, std::size_t idx) noexcept
+    {
+        return slice.derived_cast().step_size(idx);
+    }
+
+    template <class S>
+    inline auto step_size(const xslice<S>& slice, std::size_t idx, std::size_t n) noexcept
+    {
+        return slice.derived_cast().step_size(idx, n);
+    }
+
+    /*********************************************
+     * homogeneous value for integral and slices *
+     *********************************************/
+
+    template <class S, class I>
+    inline disable_xslice<S, std::size_t> value(const S& s, I) noexcept
+    {
+        return static_cast<std::size_t>(s);
+    }
+
+    template <class S, class I>
+    inline auto value(const xslice<S>& slice, I i) noexcept
+    {
+        using ST = typename S::size_type;
+        return slice.derived_cast()(static_cast<ST>(i));
+    }
+
+    /****************************************
+     * homogeneous get_slice_implementation *
+     ****************************************/
+
+    namespace detail
+    {
+        template <class T>
+        struct slice_implementation_getter
+        {
+            template <class E, class SL>
+            inline decltype(auto) operator()(E& e, SL&& slice, std::size_t index) const
+            {
+                return get_slice(e, std::forward<SL>(slice), index, xtl::is_signed<std::decay_t<SL>>());
+            }
+
+        private:
+
+            template <class E, class SL>
+            inline decltype(auto) get_slice(E&, SL&& slice, std::size_t, std::false_type) const
+            {
+                return std::forward<SL>(slice);
+            }
+
+            template <class E, class SL>
+            inline decltype(auto) get_slice(E& e, SL&& slice, std::size_t index, std::true_type) const
+            {
+                using int_type = std::decay_t<SL>;
+                return slice < int_type(0) ? slice + static_cast<std::ptrdiff_t>(e.shape(index))
+                                           : std::ptrdiff_t(slice);
+            }
+        };
+
+        struct keep_drop_getter
+        {
+            template <class E, class SL>
+            inline decltype(auto) operator()(E& e, SL&& slice, std::size_t index) const
+            {
+                slice.normalize(e.shape()[index]);
+                return std::forward<SL>(slice);
+            }
+
+            template <class E, class SL>
+            inline auto operator()(E& e, const SL& slice, std::size_t index) const
+            {
+                return this->operator()(e, SL(slice), index);
+            }
+        };
+
+        template <class T>
+        struct slice_implementation_getter<xkeep_slice<T>> : keep_drop_getter
+        {
+        };
+
+        template <class T>
+        struct slice_implementation_getter<xdrop_slice<T>> : keep_drop_getter
+        {
+        };
+
+        template <>
+        struct slice_implementation_getter<xall_tag>
+        {
+            template <class E, class SL>
+            inline auto operator()(E& e, SL&&, std::size_t index) const
+            {
+                return xall<typename E::size_type>(e.shape()[index]);
+            }
+        };
+
+        template <>
+        struct slice_implementation_getter<xnewaxis_tag>
+        {
+            template <class E, class SL>
+            inline auto operator()(E&, SL&&, std::size_t) const
+            {
+                return xnewaxis<typename E::size_type>();
+            }
+        };
+
+        template <class A, class B, class C>
+        struct slice_implementation_getter<xrange_adaptor<A, B, C>>
+        {
+            template <class E, class SL>
+            inline auto operator()(E& e, SL&& adaptor, std::size_t index) const
+            {
+                return adaptor.get(e.shape()[index]);
+            }
+        };
+    }
+
+    template <class E, class SL>
+    inline auto get_slice_implementation(E& e, SL&& slice, std::size_t index)
+    {
+        detail::slice_implementation_getter<std::decay_t<SL>> getter;
+        return getter(e, std::forward<SL>(slice), index);
+    }
+
+    /******************************
+     * homogeneous get_slice_type *
+     ******************************/
+
+    namespace detail
+    {
+        template <class E, class SL>
+        struct get_slice_type_impl
+        {
+            using type = SL;
+        };
+
+        template <class E>
+        struct get_slice_type_impl<E, xall_tag>
+        {
+            using type = xall<typename E::size_type>;
+        };
+
+        template <class E>
+        struct get_slice_type_impl<E, xnewaxis_tag>
+        {
+            using type = xnewaxis<typename E::size_type>;
+        };
+
+        template <class E, class A, class B, class C>
+        struct get_slice_type_impl<E, xrange_adaptor<A, B, C>>
+        {
+            using type = decltype(xrange_adaptor<A, B, C>(A(), B(), C()).get(0));
+        };
+    }
+
+    template <class E, class SL>
+    using get_slice_type = typename detail::get_slice_type_impl<E, std::remove_reference_t<SL>>::type;
+
+    /*************************
+     * xslice implementation *
+     *************************/
+
+    template <class D>
+    inline auto xslice<D>::derived_cast() noexcept -> derived_type&
+    {
+        return *static_cast<derived_type*>(this);
+    }
+
+    template <class D>
+    inline auto xslice<D>::derived_cast() const noexcept -> const derived_type&
+    {
+        return *static_cast<const derived_type*>(this);
+    }
+
+    /*************************
+     * xrange implementation *
+     *************************/
+
+    template <class T>
+    inline xrange<T>::xrange(size_type start_val, size_type stop_val) noexcept
+        : m_start(start_val)
+        , m_size(stop_val > start_val ? stop_val - start_val : 0)
+    {
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xrange<T>::operator xrange<S>() const noexcept
+    {
+        xrange<S> ret;
+        ret.m_start = static_cast<S>(m_start);
+        ret.m_size = static_cast<S>(m_size);
+        return ret;
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xrange<S> xrange<T>::convert() const noexcept
+    {
+        return xrange<S>(*this);
+    }
+
+    template <class T>
+    inline auto xrange<T>::operator()(size_type i) const noexcept -> size_type
+    {
+        return m_start + i;
+    }
+
+    template <class T>
+    inline auto xrange<T>::size() const noexcept -> size_type
+    {
+        return m_size;
+    }
+
+    template <class T>
+    inline auto xrange<T>::step_size() const noexcept -> size_type
+    {
+        return 1;
+    }
+
+    template <class T>
+    inline auto xrange<T>::step_size(std::size_t /*i*/, std::size_t n) const noexcept -> size_type
+    {
+        return static_cast<size_type>(n);
+    }
+
+    template <class T>
+    inline auto xrange<T>::revert_index(std::size_t i) const noexcept -> size_type
+    {
+        return i - m_start;
+    }
+
+    template <class T>
+    inline bool xrange<T>::contains(size_type i) const noexcept
+    {
+        return i >= m_start && i < m_start + m_size;
+    }
+
+    template <class T>
+    inline bool xrange<T>::operator==(const self_type& rhs) const noexcept
+    {
+        return (m_start == rhs.m_start) && (m_size == rhs.m_size);
+    }
+
+    template <class T>
+    inline bool xrange<T>::operator!=(const self_type& rhs) const noexcept
+    {
+        return !(*this == rhs);
+    }
+
+    /********************************
+     * xtepped_range implementation *
+     ********************************/
+
+    template <class T>
+    inline xstepped_range<T>::xstepped_range(size_type start_val, size_type stop_val, size_type step) noexcept
+        : m_start(start_val)
+        , m_size(size_type(0))
+        , m_step(step)
+    {
+        size_type n = stop_val - start_val;
+        m_size = n / step + (((n < 0) ^ (step > 0)) && (n % step));
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xstepped_range<T>::operator xstepped_range<S>() const noexcept
+    {
+        xstepped_range<S> ret;
+        ret.m_start = static_cast<S>(m_start);
+        ret.m_size = static_cast<S>(m_size);
+        ret.m_step = static_cast<S>(m_step);
+        return ret;
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xstepped_range<S> xstepped_range<T>::convert() const noexcept
+    {
+        return xstepped_range<S>(*this);
+    }
+
+    template <class T>
+    inline auto xstepped_range<T>::operator()(size_type i) const noexcept -> size_type
+    {
+        return m_start + i * m_step;
+    }
+
+    template <class T>
+    inline auto xstepped_range<T>::size() const noexcept -> size_type
+    {
+        return m_size;
+    }
+
+    template <class T>
+    inline auto xstepped_range<T>::step_size() const noexcept -> size_type
+    {
+        return m_step;
+    }
+
+    template <class T>
+    inline auto xstepped_range<T>::step_size(std::size_t /*i*/, std::size_t n) const noexcept -> size_type
+    {
+        return m_step * static_cast<size_type>(n);
+    }
+
+    template <class T>
+    inline auto xstepped_range<T>::revert_index(std::size_t i) const noexcept -> size_type
+    {
+        return (i - m_start) / m_step;
+    }
+
+    template <class T>
+    inline bool xstepped_range<T>::contains(size_type i) const noexcept
+    {
+        return i >= m_start && i < m_start + m_size * m_step && ((i - m_start) % m_step == 0);
+    }
+
+    template <class T>
+    inline bool xstepped_range<T>::operator==(const self_type& rhs) const noexcept
+    {
+        return (m_start == rhs.m_start) && (m_size == rhs.m_size) && (m_step == rhs.m_step);
+    }
+
+    template <class T>
+    inline bool xstepped_range<T>::operator!=(const self_type& rhs) const noexcept
+    {
+        return !(*this == rhs);
+    }
+
+    /***********************
+     * xall implementation *
+     ***********************/
+
+    template <class T>
+    inline xall<T>::xall(size_type size) noexcept
+        : m_size(size)
+    {
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xall<T>::operator xall<S>() const noexcept
+    {
+        return xall<S>(static_cast<S>(m_size));
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xall<S> xall<T>::convert() const noexcept
+    {
+        return xall<S>(*this);
+    }
+
+    template <class T>
+    inline auto xall<T>::operator()(size_type i) const noexcept -> size_type
+    {
+        return i;
+    }
+
+    template <class T>
+    inline auto xall<T>::size() const noexcept -> size_type
+    {
+        return m_size;
+    }
+
+    template <class T>
+    inline auto xall<T>::step_size() const noexcept -> size_type
+    {
+        return 1;
+    }
+
+    template <class T>
+    inline auto xall<T>::step_size(std::size_t /*i*/, std::size_t n) const noexcept -> size_type
+    {
+        return static_cast<size_type>(n);
+    }
+
+    template <class T>
+    inline auto xall<T>::revert_index(std::size_t i) const noexcept -> size_type
+    {
+        return i;
+    }
+
+    template <class T>
+    inline bool xall<T>::contains(size_type i) const noexcept
+    {
+        return i < m_size;
+    }
+
+    template <class T>
+    inline bool xall<T>::operator==(const self_type& rhs) const noexcept
+    {
+        return m_size == rhs.m_size;
+    }
+
+    template <class T>
+    inline bool xall<T>::operator!=(const self_type& rhs) const noexcept
+    {
+        return !(*this == rhs);
+    }
+
+    /***************************
+     * xnewaxis implementation *
+     ***************************/
+
+    template <class T>
+    template <class S, typename>
+    inline xnewaxis<T>::operator xnewaxis<S>() const noexcept
+    {
+        return xnewaxis<S>();
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xnewaxis<S> xnewaxis<T>::convert() const noexcept
+    {
+        return xnewaxis<S>(*this);
+    }
+
+    template <class T>
+    inline auto xnewaxis<T>::operator()(size_type) const noexcept -> size_type
+    {
+        return 0;
+    }
+
+    template <class T>
+    inline auto xnewaxis<T>::size() const noexcept -> size_type
+    {
+        return 1;
+    }
+
+    template <class T>
+    inline auto xnewaxis<T>::step_size() const noexcept -> size_type
+    {
+        return 0;
+    }
+
+    template <class T>
+    inline auto xnewaxis<T>::step_size(std::size_t /*i*/, std::size_t /*n*/) const noexcept -> size_type
+    {
+        return 0;
+    }
+
+    template <class T>
+    inline auto xnewaxis<T>::revert_index(std::size_t i) const noexcept -> size_type
+    {
+        return i;
+    }
+
+    template <class T>
+    inline bool xnewaxis<T>::contains(size_type i) const noexcept
+    {
+        return i == 0;
+    }
+
+    template <class T>
+    inline bool xnewaxis<T>::operator==(const self_type& /*rhs*/) const noexcept
+    {
+        return true;
+    }
+
+    template <class T>
+    inline bool xnewaxis<T>::operator!=(const self_type& /*rhs*/) const noexcept
+    {
+        return true;
+    }
+
+    /******************************
+     * xkeep_slice implementation *
+     ******************************/
+
+    template <class T>
+    template <class C, typename>
+    inline xkeep_slice<T>::xkeep_slice(C& cont)
+        : m_raw_indices(cont.begin(), cont.end())
+    {
+    }
+
+    template <class T>
+    inline xkeep_slice<T>::xkeep_slice(container_type&& cont)
+        : m_raw_indices(std::move(cont))
+    {
+    }
+
+    template <class T>
+    template <class S>
+    inline xkeep_slice<T>::xkeep_slice(std::initializer_list<S> t)
+        : m_raw_indices(t.size())
+    {
+        std::transform(
+            t.begin(),
+            t.end(),
+            m_raw_indices.begin(),
+            [](auto t)
+            {
+                return static_cast<size_type>(t);
+            }
+        );
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xkeep_slice<T>::operator xkeep_slice<S>() const noexcept
+    {
+        xkeep_slice<S> ret;
+        using us_type = typename container_type::size_type;
+        us_type sz = static_cast<us_type>(size());
+        ret.m_raw_indices.resize(sz);
+        ret.m_indices.resize(sz);
+        std::transform(
+            m_raw_indices.cbegin(),
+            m_raw_indices.cend(),
+            ret.m_raw_indices.begin(),
+            [](const T& val)
+            {
+                return static_cast<S>(val);
+            }
+        );
+        std::transform(
+            m_indices.cbegin(),
+            m_indices.cend(),
+            ret.m_indices.begin(),
+            [](const T& val)
+            {
+                return static_cast<S>(val);
+            }
+        );
+        return ret;
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xkeep_slice<S> xkeep_slice<T>::convert() const noexcept
+    {
+        return xkeep_slice<S>(*this);
+    }
+
+    template <class T>
+    inline void xkeep_slice<T>::normalize(std::size_t shape)
+    {
+        m_indices.resize(m_raw_indices.size());
+        std::size_t sz = m_indices.size();
+        for (std::size_t i = 0; i < sz; ++i)
+        {
+            m_indices[i] = m_raw_indices[i] < 0 ? static_cast<size_type>(shape) + m_raw_indices[i]
+                                                : m_raw_indices[i];
+        }
+    }
+
+    template <class T>
+    inline auto xkeep_slice<T>::operator()(size_type i) const noexcept -> size_type
+    {
+        return m_indices.size() == size_type(1) ? m_indices.front() : m_indices[static_cast<std::size_t>(i)];
+    }
+
+    template <class T>
+    inline auto xkeep_slice<T>::size() const noexcept -> size_type
+    {
+        return static_cast<size_type>(m_raw_indices.size());
+    }
+
+    template <class T>
+    inline auto xkeep_slice<T>::step_size(std::size_t i, std::size_t n) const noexcept -> size_type
+    {
+        if (m_indices.size() == 1)
+        {
+            return 0;
+        }
+        if (i + n >= m_indices.size())
+        {
+            return m_indices.back() - m_indices[i] + 1;
+        }
+        else
+        {
+            return m_indices[i + n] - m_indices[i];
+        }
+    }
+
+    template <class T>
+    inline auto xkeep_slice<T>::revert_index(std::size_t i) const -> size_type
+    {
+        auto it = std::find(m_indices.begin(), m_indices.end(), i);
+        if (it != m_indices.end())
+        {
+            return std::distance(m_indices.begin(), it);
+        }
+        else
+        {
+            XTENSOR_THROW(std::runtime_error, "Index i (" + std::to_string(i) + ") not in indices of islice.");
+        }
+    }
+
+    template <class T>
+    inline bool xkeep_slice<T>::contains(size_type i) const noexcept
+    {
+        return (std::find(m_indices.begin(), m_indices.end(), i) == m_indices.end()) ? false : true;
+    }
+
+    template <class T>
+    inline bool xkeep_slice<T>::operator==(const self_type& rhs) const noexcept
+    {
+        return m_indices == rhs.m_indices;
+    }
+
+    template <class T>
+    inline bool xkeep_slice<T>::operator!=(const self_type& rhs) const noexcept
+    {
+        return !(*this == rhs);
+    }
+
+    /******************************
+     * xdrop_slice implementation *
+     ******************************/
+
+    template <class T>
+    template <class C, typename>
+    inline xdrop_slice<T>::xdrop_slice(C& cont)
+        : m_raw_indices(cont.begin(), cont.end())
+    {
+    }
+
+    template <class T>
+    inline xdrop_slice<T>::xdrop_slice(container_type&& cont)
+        : m_raw_indices(std::move(cont))
+    {
+    }
+
+    template <class T>
+    template <class S>
+    inline xdrop_slice<T>::xdrop_slice(std::initializer_list<S> t)
+        : m_raw_indices(t.size())
+    {
+        std::transform(
+            t.begin(),
+            t.end(),
+            m_raw_indices.begin(),
+            [](auto t)
+            {
+                return static_cast<size_type>(t);
+            }
+        );
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xdrop_slice<T>::operator xdrop_slice<S>() const noexcept
+    {
+        xdrop_slice<S> ret;
+        ret.m_raw_indices.resize(m_raw_indices.size());
+        ret.m_indices.resize(m_indices.size());
+        std::transform(
+            m_raw_indices.cbegin(),
+            m_raw_indices.cend(),
+            ret.m_raw_indices.begin(),
+            [](const T& val)
+            {
+                return static_cast<S>(val);
+            }
+        );
+        std::transform(
+            m_indices.cbegin(),
+            m_indices.cend(),
+            ret.m_indices.begin(),
+            [](const T& val)
+            {
+                return static_cast<S>(val);
+            }
+        );
+        std::transform(
+            m_inc.cbegin(),
+            m_inc.cend(),
+            std::inserter(ret.m_inc, ret.m_inc.begin()),
+            [](const auto& val)
+            {
+                return std::make_pair(static_cast<S>(val.first), static_cast<S>(val.second));
+            }
+        );
+        ret.m_size = static_cast<S>(m_size);
+        return ret;
+    }
+
+    template <class T>
+    template <class S, typename>
+    inline xdrop_slice<S> xdrop_slice<T>::convert() const noexcept
+    {
+        return xdrop_slice<S>(*this);
+    }
+
+    template <class T>
+    inline void xdrop_slice<T>::normalize(std::size_t shape)
+    {
+        m_size = static_cast<size_type>(shape - m_raw_indices.size());
+
+        m_indices.resize(m_raw_indices.size());
+        std::size_t sz = m_indices.size();
+        for (std::size_t i = 0; i < sz; ++i)
+        {
+            m_indices[i] = m_raw_indices[i] < 0 ? static_cast<size_type>(shape) + m_raw_indices[i]
+                                                : m_raw_indices[i];
+        }
+        size_type cum = size_type(0);
+        size_type prev_cum = cum;
+        for (std::size_t i = 0; i < sz; ++i)
+        {
+            std::size_t ind = i;
+            size_type d = m_indices[i];
+            while (i + 1 < sz && m_indices[i + 1] == m_indices[i] + 1)
+            {
+                ++i;
+            }
+            cum += (static_cast<size_type>(i) - static_cast<size_type>(ind)) + 1;
+            m_inc[d - prev_cum] = cum;
+            prev_cum = cum;
+        }
+    }
+
+    template <class T>
+    inline auto xdrop_slice<T>::operator()(size_type i) const noexcept -> size_type
+    {
+        if (m_inc.empty() || i < m_inc.begin()->first)
+        {
+            return i;
+        }
+        else
+        {
+            auto iter = --m_inc.upper_bound(i);
+            return i + iter->second;
+        }
+    }
+
+    template <class T>
+    inline auto xdrop_slice<T>::size() const noexcept -> size_type
+    {
+        return m_size;
+    }
+
+    template <class T>
+    inline auto xdrop_slice<T>::step_size(std::size_t i, std::size_t n) const noexcept -> size_type
+    {
+        if (i + n >= static_cast<std::size_t>(m_size))
+        {
+            return (*this)(static_cast<size_type>(m_size - 1)) - (*this)(static_cast<size_type>(i)) + 1;
+        }
+        else
+        {
+            return (*this)(static_cast<size_type>(i + n)) - (*this)(static_cast<size_type>(i));
+        }
+    }
+
+    template <class T>
+    inline auto xdrop_slice<T>::revert_index(std::size_t i) const -> size_type
+    {
+        if (i < m_inc.begin()->first)
+        {
+            return i;
+        }
+        else
+        {
+            auto iter = --m_inc.lower_bound(i);
+            auto check = iter->first + iter->second;
+            if (check > i)
+            {
+                --iter;
+            }
+            return i - iter->second;
+        }
+    }
+
+    template <class T>
+    inline bool xdrop_slice<T>::contains(size_type i) const noexcept
+    {
+        return (std::find(m_indices.begin(), m_indices.end(), i) == m_indices.end()) ? true : false;
+    }
+
+    template <class T>
+    inline bool xdrop_slice<T>::operator==(const self_type& rhs) const noexcept
+    {
+        return m_indices == rhs.m_indices;
+    }
+
+    template <class T>
+    inline bool xdrop_slice<T>::operator!=(const self_type& rhs) const noexcept
+    {
+        return !(*this == rhs);
+    }
+}
+
+#undef XTENSOR_CONSTEXPR
+
+#endif

+ 1353 - 0
3rd/numpy/include/xtensor/xsort.hpp

@@ -0,0 +1,1353 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_SORT_HPP
+#define XTENSOR_SORT_HPP
+
+#include <algorithm>
+#include <cmath>
+#include <iterator>
+#include <utility>
+
+#include <xtl/xcompare.hpp>
+
+#include "xadapt.hpp"
+#include "xarray.hpp"
+#include "xeval.hpp"
+#include "xindex_view.hpp"
+#include "xmanipulation.hpp"
+#include "xmath.hpp"
+#include "xslice.hpp"  // for xnone
+#include "xtensor.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_forward.hpp"
+#include "xview.hpp"
+
+namespace xt
+{
+    /**
+     * @defgroup xt_xsort Sorting functions.
+     *
+     * Because sorting functions need to access the tensor data repeatedly, they evaluate their
+     * input and may allocate temporaries.
+     */
+
+    namespace detail
+    {
+        template <class T>
+        std::ptrdiff_t adjust_secondary_stride(std::ptrdiff_t stride, T shape)
+        {
+            return stride != 0 ? stride : static_cast<std::ptrdiff_t>(shape);
+        }
+
+        template <class E>
+        inline std::ptrdiff_t get_secondary_stride(const E& ev)
+        {
+            if (ev.layout() == layout_type::row_major)
+            {
+                return adjust_secondary_stride(ev.strides()[ev.dimension() - 2], *(ev.shape().end() - 1));
+            }
+
+            return adjust_secondary_stride(ev.strides()[1], *(ev.shape().begin()));
+        }
+
+        template <class E>
+        inline std::size_t leading_axis_n_iters(const E& ev)
+        {
+            if (ev.layout() == layout_type::row_major)
+            {
+                return std::accumulate(
+                    ev.shape().begin(),
+                    ev.shape().end() - 1,
+                    std::size_t(1),
+                    std::multiplies<>()
+                );
+            }
+            return std::accumulate(ev.shape().begin() + 1, ev.shape().end(), std::size_t(1), std::multiplies<>());
+        }
+
+        template <class E, class F>
+        inline void call_over_leading_axis(E& ev, F&& fct)
+        {
+            XTENSOR_ASSERT(ev.dimension() >= 2);
+
+            const std::size_t n_iters = leading_axis_n_iters(ev);
+            const std::ptrdiff_t secondary_stride = get_secondary_stride(ev);
+
+            const auto begin = ev.data();
+            const auto end = begin + n_iters * secondary_stride;
+            for (auto iter = begin; iter != end; iter += secondary_stride)
+            {
+                fct(iter, iter + secondary_stride);
+            }
+        }
+
+        template <class E1, class E2, class F>
+        inline void call_over_leading_axis(E1& e1, E2& e2, F&& fct)
+        {
+            XTENSOR_ASSERT(e1.dimension() >= 2);
+            XTENSOR_ASSERT(e1.dimension() == e2.dimension());
+
+            const std::size_t n_iters = leading_axis_n_iters(e1);
+            const std::ptrdiff_t secondary_stride1 = get_secondary_stride(e1);
+            const std::ptrdiff_t secondary_stride2 = get_secondary_stride(e2);
+            XTENSOR_ASSERT(secondary_stride1 == secondary_stride2);
+
+            const auto begin1 = e1.data();
+            const auto end1 = begin1 + n_iters * secondary_stride1;
+            const auto begin2 = e2.data();
+            const auto end2 = begin2 + n_iters * secondary_stride2;
+            auto iter1 = begin1;
+            auto iter2 = begin2;
+            for (; (iter1 != end1) && (iter2 != end2); iter1 += secondary_stride1, iter2 += secondary_stride2)
+            {
+                fct(iter1, iter1 + secondary_stride1, iter2, iter2 + secondary_stride2);
+            }
+        }
+
+        template <class E>
+        inline std::size_t leading_axis(const E& e)
+        {
+            if (e.layout() == layout_type::row_major)
+            {
+                return e.dimension() - 1;
+            }
+            else if (e.layout() == layout_type::column_major)
+            {
+                return 0;
+            }
+            XTENSOR_THROW(std::runtime_error, "Layout not supported.");
+        }
+
+        // get permutations to transpose and reverse-transpose array
+        inline std::pair<dynamic_shape<std::size_t>, dynamic_shape<std::size_t>>
+        get_permutations(std::size_t dim, std::size_t ax, layout_type layout)
+        {
+            dynamic_shape<std::size_t> permutation(dim);
+            std::iota(permutation.begin(), permutation.end(), std::size_t(0));
+            permutation.erase(permutation.begin() + std::ptrdiff_t(ax));
+
+            if (layout == layout_type::row_major)
+            {
+                permutation.push_back(ax);
+            }
+            else
+            {
+                permutation.insert(permutation.begin(), ax);
+            }
+
+            // TODO find a more clever way to get reverse permutation?
+            dynamic_shape<std::size_t> reverse_permutation;
+            for (std::size_t i = 0; i < dim; ++i)
+            {
+                auto it = std::find(permutation.begin(), permutation.end(), i);
+                reverse_permutation.push_back(std::size_t(std::distance(permutation.begin(), it)));
+            }
+
+            return std::make_pair(std::move(permutation), std::move(reverse_permutation));
+        }
+
+        template <class R, class E, class F>
+        inline R map_axis(const E& e, std::ptrdiff_t axis, F&& lambda)
+        {
+            if (e.dimension() == 1)
+            {
+                R res = e;
+                lambda(res.begin(), res.end());
+                return res;
+            }
+
+            const std::size_t ax = normalize_axis(e.dimension(), axis);
+            if (ax == detail::leading_axis(e))
+            {
+                R res = e;
+                detail::call_over_leading_axis(res, std::forward<F>(lambda));
+                return res;
+            }
+
+            dynamic_shape<std::size_t> permutation, reverse_permutation;
+            std::tie(permutation, reverse_permutation) = get_permutations(e.dimension(), ax, e.layout());
+            R res = transpose(e, permutation);
+            detail::call_over_leading_axis(res, std::forward<F>(lambda));
+            res = transpose(res, reverse_permutation);
+            return res;
+        }
+
+        template <class VT>
+        struct flatten_sort_result_type_impl
+        {
+            using type = VT;
+        };
+
+        template <class VT, std::size_t N, layout_type L>
+        struct flatten_sort_result_type_impl<xtensor<VT, N, L>>
+        {
+            using type = xtensor<VT, 1, L>;
+        };
+
+        template <class VT, class S, layout_type L>
+        struct flatten_sort_result_type_impl<xtensor_fixed<VT, S, L>>
+        {
+            using type = xtensor_fixed<VT, xshape<fixed_compute_size<S>::value>, L>;
+        };
+
+        template <class VT>
+        struct flatten_sort_result_type : flatten_sort_result_type_impl<common_tensor_type_t<VT>>
+        {
+        };
+
+        template <class VT>
+        using flatten_sort_result_type_t = typename flatten_sort_result_type<VT>::type;
+
+        template <class E, class R = flatten_sort_result_type_t<E>>
+        inline auto flat_sort_impl(const xexpression<E>& e)
+        {
+            const auto& de = e.derived_cast();
+            R ev;
+            ev.resize({static_cast<typename R::shape_type::value_type>(de.size())});
+
+            std::copy(de.cbegin(), de.cend(), ev.begin());
+            std::sort(ev.begin(), ev.end());
+
+            return ev;
+        }
+    }
+
+    template <class E>
+    inline auto sort(const xexpression<E>& e, placeholders::xtuph /*t*/)
+    {
+        return detail::flat_sort_impl(e);
+    }
+
+    namespace detail
+    {
+        template <class T>
+        struct sort_eval_type
+        {
+            using type = typename T::temporary_type;
+        };
+
+        template <class T, std::size_t... I, layout_type L>
+        struct sort_eval_type<xtensor_fixed<T, fixed_shape<I...>, L>>
+        {
+            using type = xtensor<T, sizeof...(I), L>;
+        };
+    }
+
+    /**
+     * Sort xexpression (optionally along axis)
+     * The sort is performed using the ``std::sort`` functions.
+     * A copy of the xexpression is created and returned.
+     *
+     * @ingroup xt_xsort
+     * @param e xexpression to sort
+     * @param axis axis along which sort is performed
+     *
+     * @return sorted array (copy)
+     */
+    template <class E>
+    inline auto sort(const xexpression<E>& e, std::ptrdiff_t axis = -1)
+    {
+        using eval_type = typename detail::sort_eval_type<E>::type;
+
+        return detail::map_axis<eval_type>(
+            e.derived_cast(),
+            axis,
+            [](auto begin, auto end)
+            {
+                std::sort(begin, end);
+            }
+        );
+    }
+
+    /*****************************
+     * Implementation of argsort *
+     *****************************/
+
+    /**
+     * Sorting method.
+     * Predefined methods for performing indirect sorting.
+     * @see argsort(const xexpression<E>&, std::ptrdiff_t, sorting_method)
+     */
+    enum class sorting_method
+    {
+        /**
+         *  Faster method but with no guarantee on preservation of order of equal elements
+         *  https://en.cppreference.com/w/cpp/algorithm/sort.
+         */
+        quick,
+        /**
+         *  Slower method but with guarantee on preservation of order of equal elements
+         *  https://en.cppreference.com/w/cpp/algorithm/stable_sort.
+         */
+        stable,
+    };
+
+    namespace detail
+    {
+        template <class ConstRandomIt, class RandomIt, class Compare, class Method>
+        inline void argsort_iter(
+            ConstRandomIt data_begin,
+            ConstRandomIt data_end,
+            RandomIt idx_begin,
+            RandomIt idx_end,
+            Compare comp,
+            Method method
+        )
+        {
+            XTENSOR_ASSERT(std::distance(data_begin, data_end) >= 0);
+            XTENSOR_ASSERT(std::distance(idx_begin, idx_end) == std::distance(data_begin, data_end));
+            (void) idx_end;  // TODO(C++17) [[maybe_unused]] only used in assertion.
+
+            std::iota(idx_begin, idx_end, 0);
+            switch (method)
+            {
+                case (sorting_method::quick):
+                {
+                    std::sort(
+                        idx_begin,
+                        idx_end,
+                        [&](const auto i, const auto j)
+                        {
+                            return comp(*(data_begin + i), *(data_begin + j));
+                        }
+                    );
+                }
+                case (sorting_method::stable):
+                {
+                    std::stable_sort(
+                        idx_begin,
+                        idx_end,
+                        [&](const auto i, const auto j)
+                        {
+                            return comp(*(data_begin + i), *(data_begin + j));
+                        }
+                    );
+                }
+            }
+        }
+
+        template <class ConstRandomIt, class RandomIt, class Method>
+        inline void
+        argsort_iter(ConstRandomIt data_begin, ConstRandomIt data_end, RandomIt idx_begin, RandomIt idx_end, Method method)
+        {
+            return argsort_iter(
+                std::move(data_begin),
+                std::move(data_end),
+                std::move(idx_begin),
+                std::move(idx_end),
+                [](const auto& x, const auto& y) -> bool
+                {
+                    return x < y;
+                },
+                method
+            );
+        }
+
+        template <class VT, class T>
+        struct rebind_value_type
+        {
+            using type = xarray<VT, xt::layout_type::dynamic>;
+        };
+
+        template <class VT, class EC, layout_type L>
+        struct rebind_value_type<VT, xarray<EC, L>>
+        {
+            using type = xarray<VT, L>;
+        };
+
+        template <class VT, class EC, std::size_t N, layout_type L>
+        struct rebind_value_type<VT, xtensor<EC, N, L>>
+        {
+            using type = xtensor<VT, N, L>;
+        };
+
+        template <class VT, class ET, class S, layout_type L>
+        struct rebind_value_type<VT, xtensor_fixed<ET, S, L>>
+        {
+            using type = xtensor_fixed<VT, S, L>;
+        };
+
+        template <class VT, class T>
+        struct flatten_rebind_value_type
+        {
+            using type = typename rebind_value_type<VT, T>::type;
+        };
+
+        template <class VT, class EC, std::size_t N, layout_type L>
+        struct flatten_rebind_value_type<VT, xtensor<EC, N, L>>
+        {
+            using type = xtensor<VT, 1, L>;
+        };
+
+        template <class VT, class ET, class S, layout_type L>
+        struct flatten_rebind_value_type<VT, xtensor_fixed<ET, S, L>>
+        {
+            using type = xtensor_fixed<VT, xshape<fixed_compute_size<S>::value>, L>;
+        };
+
+        template <class T>
+        struct argsort_result_type
+        {
+            using type = typename rebind_value_type<typename T::temporary_type::size_type, typename T::temporary_type>::type;
+        };
+
+        template <class T>
+        struct linear_argsort_result_type
+        {
+            using type = typename flatten_rebind_value_type<
+                typename T::temporary_type::size_type,
+                typename T::temporary_type>::type;
+        };
+
+        template <class E, class R = typename detail::linear_argsort_result_type<E>::type, class Method>
+        inline auto flatten_argsort_impl(const xexpression<E>& e, Method method)
+        {
+            const auto& de = e.derived_cast();
+
+            auto cit = de.template begin<layout_type::row_major>();
+            using const_iterator = decltype(cit);
+            auto ad = xiterator_adaptor<const_iterator, const_iterator>(cit, cit, de.size());
+
+            using result_type = R;
+            result_type result;
+            result.resize({de.size()});
+
+            detail::argsort_iter(de.cbegin(), de.cend(), result.begin(), result.end(), method);
+
+            return result;
+        }
+    }
+
+    template <class E>
+    inline auto
+    argsort(const xexpression<E>& e, placeholders::xtuph /*t*/, sorting_method method = sorting_method::quick)
+    {
+        return detail::flatten_argsort_impl(e, method);
+    }
+
+    /**
+     * Argsort xexpression (optionally along axis)
+     * Performs an indirect sort along the given axis. Returns an xarray
+     * of indices of the same shape as e that index data along the given axis in
+     * sorted order.
+     *
+     * @ingroup xt_xsort
+     * @param e xexpression to argsort
+     * @param axis axis along which argsort is performed
+     * @param method sorting algorithm to use
+     *
+     * @return argsorted index array
+     *
+     * @see xt::sorting_method
+     */
+    template <class E>
+    inline auto
+    argsort(const xexpression<E>& e, std::ptrdiff_t axis = -1, sorting_method method = sorting_method::quick)
+    {
+        using eval_type = typename detail::sort_eval_type<E>::type;
+        using result_type = typename detail::argsort_result_type<eval_type>::type;
+
+        const auto& de = e.derived_cast();
+
+        std::size_t ax = normalize_axis(de.dimension(), axis);
+
+        if (de.dimension() == 1)
+        {
+            return detail::flatten_argsort_impl<E, result_type>(e, method);
+        }
+
+        const auto argsort = [&method](auto res_begin, auto res_end, auto ev_begin, auto ev_end)
+        {
+            detail::argsort_iter(ev_begin, ev_end, res_begin, res_end, method);
+        };
+
+        if (ax == detail::leading_axis(de))
+        {
+            result_type res = result_type::from_shape(de.shape());
+            detail::call_over_leading_axis(res, de, argsort);
+            return res;
+        }
+
+        dynamic_shape<std::size_t> permutation, reverse_permutation;
+        std::tie(permutation, reverse_permutation) = detail::get_permutations(de.dimension(), ax, de.layout());
+        eval_type ev = transpose(de, permutation);
+        result_type res = result_type::from_shape(ev.shape());
+        detail::call_over_leading_axis(res, ev, argsort);
+        res = transpose(res, reverse_permutation);
+        return res;
+    }
+
+    /************************************************
+     * Implementation of partition and argpartition *
+     ************************************************/
+
+    namespace detail
+    {
+        /**
+         * Partition a given random iterator.
+         *
+         * @param data_begin Start of the data to partition.
+         * @param data_end Past end of the data to partition.
+         * @param kth_start Start of the indices to partition.
+         *        Indices must be sorted in decreasing order.
+         * @param kth_end Past end of the indices to partition.
+         *        Indices must be sorted in decreasing order.
+         * @param comp Comparison function for `x < y`.
+         */
+        template <class RandomIt, class Iter, class Compare>
+        inline void
+        partition_iter(RandomIt data_begin, RandomIt data_end, Iter kth_begin, Iter kth_end, Compare comp)
+        {
+            XTENSOR_ASSERT(std::distance(data_begin, data_end) >= 0);
+            XTENSOR_ASSERT(std::distance(kth_begin, kth_end) >= 0);
+
+            using idx_type = typename std::iterator_traits<Iter>::value_type;
+
+            idx_type k_last = static_cast<idx_type>(std::distance(data_begin, data_end));
+            for (; kth_begin != kth_end; ++kth_begin)
+            {
+                std::nth_element(data_begin, data_begin + *kth_begin, data_begin + k_last, std::move(comp));
+                k_last = *kth_begin;
+            }
+        }
+
+        template <class RandomIt, class Iter>
+        inline void partition_iter(RandomIt data_begin, RandomIt data_end, Iter kth_begin, Iter kth_end)
+        {
+            return partition_iter(
+                std::move(data_begin),
+                std::move(data_end),
+                std::move(kth_begin),
+                std::move(kth_end),
+                [](const auto& x, const auto& y) -> bool
+                {
+                    return x < y;
+                }
+            );
+        }
+    }
+
+    /**
+     * Partially sort xexpression
+     *
+     * Partition shuffles the xexpression in a way so that the kth element
+     * in the returned xexpression is in the place it would appear in a sorted
+     * array and all elements smaller than this entry are placed (unsorted) before.
+     *
+     * The optional third parameter can either be an axis or ``xnone()`` in which case
+     * the xexpression will be flattened.
+     *
+     * This function uses ``std::nth_element`` internally.
+     *
+     * @code{cpp}
+     * xt::xarray<float> a = {1, 10, -10, 123};
+     * std::cout << xt::partition(a, 0) << std::endl; // {-10, 1, 123, 10} the correct entry at index 0
+     * std::cout << xt::partition(a, 3) << std::endl; // {1, 10, -10, 123} the correct entry at index 3
+     * std::cout << xt::partition(a, {0, 3}) << std::endl; // {-10, 1, 10, 123} the correct entries at index 0
+     * and 3 \endcode
+     *
+     * @ingroup xt_xsort
+     * @param e input xexpression
+     * @param kth_container a container of ``indices`` that should contain the correctly sorted value
+     * @param axis either integer (default = -1) to sort along last axis or ``xnone()`` to flatten before
+     * sorting
+     *
+     * @return partially sorted xcontainer
+     */
+    template <
+        class E,
+        class C,
+        class R = detail::flatten_sort_result_type_t<E>,
+        class = std::enable_if_t<!xtl::is_integral<C>::value, int>>
+    inline R partition(const xexpression<E>& e, C kth_container, placeholders::xtuph /*ax*/)
+    {
+        const auto& de = e.derived_cast();
+
+        R ev = R::from_shape({de.size()});
+        std::sort(kth_container.begin(), kth_container.end());
+
+        std::copy(de.linear_cbegin(), de.linear_cend(), ev.linear_begin());  // flatten
+
+        detail::partition_iter(ev.linear_begin(), ev.linear_end(), kth_container.rbegin(), kth_container.rend());
+
+        return ev;
+    }
+
+    template <class E, class I, std::size_t N, class R = detail::flatten_sort_result_type_t<E>>
+    inline R partition(const xexpression<E>& e, const I (&kth_container)[N], placeholders::xtuph tag)
+    {
+        return partition(
+            e,
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(kth_container)>(kth_container),
+            tag
+        );
+    }
+
+    template <class E, class R = detail::flatten_sort_result_type_t<E>>
+    inline R partition(const xexpression<E>& e, std::size_t kth, placeholders::xtuph tag)
+    {
+        return partition(e, std::array<std::size_t, 1>({kth}), tag);
+    }
+
+    template <class E, class C, class = std::enable_if_t<!xtl::is_integral<C>::value, int>>
+    inline auto partition(const xexpression<E>& e, C kth_container, std::ptrdiff_t axis = -1)
+    {
+        using eval_type = typename detail::sort_eval_type<E>::type;
+
+        std::sort(kth_container.begin(), kth_container.end());
+
+        return detail::map_axis<eval_type>(
+            e.derived_cast(),
+            axis,
+            [&kth_container](auto begin, auto end)
+            {
+                detail::partition_iter(begin, end, kth_container.rbegin(), kth_container.rend());
+            }
+        );
+    }
+
+    template <class E, class T, std::size_t N>
+    inline auto partition(const xexpression<E>& e, const T (&kth_container)[N], std::ptrdiff_t axis = -1)
+    {
+        return partition(
+            e,
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(kth_container)>(kth_container),
+            axis
+        );
+    }
+
+    template <class E>
+    inline auto partition(const xexpression<E>& e, std::size_t kth, std::ptrdiff_t axis = -1)
+    {
+        return partition(e, std::array<std::size_t, 1>({kth}), axis);
+    }
+
+    /**
+     * Partially sort arguments
+     *
+     * Argpartition shuffles the indices to a xexpression in a way so that the index for the
+     * kth element in the returned xexpression is in the place it would appear in a sorted
+     * array and all elements smaller than this entry are placed (unsorted) before.
+     *
+     * The optional third parameter can either be an axis or ``xnone()`` in which case
+     * the xexpression will be flattened.
+     *
+     * This function uses ``std::nth_element`` internally.
+     *
+     * @code{cpp}
+     * xt::xarray<float> a = {1, 10, -10, 123};
+     * std::cout << xt::argpartition(a, 0) << std::endl; // {2, 0, 3, 1} the correct entry at index 0
+     * std::cout << xt::argpartition(a, 3) << std::endl; // {0, 1, 2, 3} the correct entry at index 3
+     * std::cout << xt::argpartition(a, {0, 3}) << std::endl; // {2, 0, 1, 3} the correct entries at index 0
+     * and 3 \endcode
+     *
+     * @ingroup xt_xsort
+     * @param e input xexpression
+     * @param kth_container a container of ``indices`` that should contain the correctly sorted value
+     * @param axis either integer (default = -1) to sort along last axis or ``xnone()`` to flatten before
+     * sorting
+     *
+     * @return xcontainer with indices of partial sort of input
+     */
+    template <
+        class E,
+        class C,
+        class R = typename detail::linear_argsort_result_type<typename detail::sort_eval_type<E>::type>::type,
+        class = std::enable_if_t<!xtl::is_integral<C>::value, int>>
+    inline R argpartition(const xexpression<E>& e, C kth_container, placeholders::xtuph)
+    {
+        using eval_type = typename detail::sort_eval_type<E>::type;
+        using result_type = typename detail::linear_argsort_result_type<eval_type>::type;
+
+        const auto& de = e.derived_cast();
+
+        result_type res = result_type::from_shape({de.size()});
+
+        std::sort(kth_container.begin(), kth_container.end());
+
+        std::iota(res.linear_begin(), res.linear_end(), 0);
+
+        detail::partition_iter(
+            res.linear_begin(),
+            res.linear_end(),
+            kth_container.rbegin(),
+            kth_container.rend(),
+            [&de](std::size_t a, std::size_t b)
+            {
+                return de[a] < de[b];
+            }
+        );
+
+        return res;
+    }
+
+    template <class E, class I, std::size_t N>
+    inline auto argpartition(const xexpression<E>& e, const I (&kth_container)[N], placeholders::xtuph tag)
+    {
+        return argpartition(
+            e,
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(kth_container)>(kth_container),
+            tag
+        );
+    }
+
+    template <class E>
+    inline auto argpartition(const xexpression<E>& e, std::size_t kth, placeholders::xtuph tag)
+    {
+        return argpartition(e, std::array<std::size_t, 1>({kth}), tag);
+    }
+
+    template <class E, class C, class = std::enable_if_t<!xtl::is_integral<C>::value, int>>
+    inline auto argpartition(const xexpression<E>& e, C kth_container, std::ptrdiff_t axis = -1)
+    {
+        using eval_type = typename detail::sort_eval_type<E>::type;
+        using result_type = typename detail::argsort_result_type<eval_type>::type;
+
+        const auto& de = e.derived_cast();
+
+        if (de.dimension() == 1)
+        {
+            return argpartition<E, C, result_type>(e, std::forward<C>(kth_container), xnone());
+        }
+
+        std::sort(kth_container.begin(), kth_container.end());
+        const auto argpartition_w_kth =
+            [&kth_container](auto res_begin, auto res_end, auto ev_begin, auto /*ev_end*/)
+        {
+            std::iota(res_begin, res_end, 0);
+            detail::partition_iter(
+                res_begin,
+                res_end,
+                kth_container.rbegin(),
+                kth_container.rend(),
+                [&ev_begin](auto const& i, auto const& j)
+                {
+                    return *(ev_begin + i) < *(ev_begin + j);
+                }
+            );
+        };
+
+        const std::size_t ax = normalize_axis(de.dimension(), axis);
+        if (ax == detail::leading_axis(de))
+        {
+            result_type res = result_type::from_shape(de.shape());
+            detail::call_over_leading_axis(res, de, argpartition_w_kth);
+            return res;
+        }
+
+        dynamic_shape<std::size_t> permutation, reverse_permutation;
+        std::tie(permutation, reverse_permutation) = detail::get_permutations(de.dimension(), ax, de.layout());
+        eval_type ev = transpose(de, permutation);
+        result_type res = result_type::from_shape(ev.shape());
+        detail::call_over_leading_axis(res, ev, argpartition_w_kth);
+        res = transpose(res, reverse_permutation);
+        return res;
+    }
+
+    template <class E, class I, std::size_t N>
+    inline auto argpartition(const xexpression<E>& e, const I (&kth_container)[N], std::ptrdiff_t axis = -1)
+    {
+        return argpartition(
+            e,
+            xtl::forward_sequence<std::array<std::size_t, N>, decltype(kth_container)>(kth_container),
+            axis
+        );
+    }
+
+    template <class E>
+    inline auto argpartition(const xexpression<E>& e, std::size_t kth, std::ptrdiff_t axis = -1)
+    {
+        return argpartition(e, std::array<std::size_t, 1>({kth}), axis);
+    }
+
+    /******************
+     *  xt::quantile  *
+     ******************/
+
+    namespace detail
+    {
+        template <class S, class I, class K, class O>
+        inline void select_indices_impl(
+            const S& shape,
+            const I& indices,
+            std::size_t axis,
+            std::size_t current_dim,
+            const K& current_index,
+            O& out
+        )
+        {
+            using id_t = typename K::value_type;
+            if ((current_dim < shape.size() - 1) && (current_dim == axis))
+            {
+                for (auto i : indices)
+                {
+                    auto idx = current_index;
+                    idx[current_dim] = i;
+                    select_indices_impl(shape, indices, axis, current_dim + 1, idx, out);
+                }
+            }
+            else if ((current_dim < shape.size() - 1) && (current_dim != axis))
+            {
+                for (id_t i = 0; xtl::cmp_less(i, shape[current_dim]); ++i)
+                {
+                    auto idx = current_index;
+                    idx[current_dim] = i;
+                    select_indices_impl(shape, indices, axis, current_dim + 1, idx, out);
+                }
+            }
+            else if ((current_dim == shape.size() - 1) && (current_dim == axis))
+            {
+                for (auto i : indices)
+                {
+                    auto idx = current_index;
+                    idx[current_dim] = i;
+                    out.push_back(std::move(idx));
+                }
+            }
+            else if ((current_dim == shape.size() - 1) && (current_dim != axis))
+            {
+                for (id_t i = 0; xtl::cmp_less(i, shape[current_dim]); ++i)
+                {
+                    auto idx = current_index;
+                    idx[current_dim] = i;
+                    out.push_back(std::move(idx));
+                }
+            }
+        }
+
+        template <class S, class I>
+        inline auto select_indices(const S& shape, const I& indices, std::size_t axis)
+        {
+            using index_type = get_strides_t<S>;
+            auto out = std::vector<index_type>();
+            select_indices_impl(shape, indices, axis, 0, xtl::make_sequence<index_type>(shape.size()), out);
+            return out;
+        }
+
+        // TODO remove when fancy index views are implemented
+        // Poor man's indexing along a single axis as in NumPy a[:, [1, 3, 4]]
+        template <class E, class I>
+        inline auto fancy_indexing(E&& e, const I& indices, std::ptrdiff_t axis)
+        {
+            const std::size_t ax = normalize_axis(e.dimension(), axis);
+            using shape_t = get_strides_t<typename std::decay_t<E>::shape_type>;
+            auto shape = xtl::forward_sequence<shape_t, decltype(e.shape())>(e.shape());
+            shape[ax] = indices.size();
+            return reshape_view(
+                index_view(std::forward<E>(e), select_indices(e.shape(), indices, ax)),
+                std::move(shape)
+            );
+        }
+
+        template <class T, class I, class P>
+        inline auto quantile_kth_gamma(std::size_t n, const P& probas, T alpha, T beta)
+        {
+            const auto m = alpha + probas * (T(1) - alpha - beta);
+            // Evaluting since reused a lot
+            const auto p_n_m = eval(probas * static_cast<T>(n) + m - 1);
+            // Previous (virtual) index, may be out of bounds
+            const auto j = floor(p_n_m);
+            const auto j_jp1 = concatenate(xtuple(j, j + 1));
+            // Both interpolation indices, k and k+1
+            const auto k_kp1 = xt::cast<std::size_t>(clip(j_jp1, T(0), T(n - 1)));
+            // Both interpolation coefficients, 1-gamma and gamma
+            const auto omg_g = concatenate(xtuple(T(1) - (p_n_m - j), p_n_m - j));
+            return std::make_pair(eval(k_kp1), eval(omg_g));
+        }
+
+        // TODO should implement unsqueeze rather
+        template <class S>
+        inline auto unsqueeze_shape(const S& shape, std::size_t axis)
+        {
+            XTENSOR_ASSERT(axis <= shape.size());
+            auto new_shape = xtl::forward_sequence<xt::svector<std::size_t>, decltype(shape)>(shape);
+            new_shape.insert(new_shape.begin() + axis, 1);
+            return new_shape;
+        }
+    }
+
+    /**
+     * Compute quantiles over the given axis.
+     *
+     * In a sorted array represneting a distribution of numbers, the quantile of a probability ``p``
+     * is the the cut value ``q`` such that a fraction ``p`` of the distribution is lesser or equal
+     * to ``q``.
+     * When the cutpoint falls between two elemnts of the sample distribution, a interpolation is
+     * computed using the @p alpha and @p beta coefficients, as descripted in
+     * (Hyndman and Fan, 1996).
+     *
+     * The algorithm partially sorts entries in a copy along the @p axis axis.
+     *
+     * @ingroup xt_xsort
+     * @param e Expression containing the distribution over which the quantiles are computed.
+     * @param probas An list of probability associated with each desired quantiles.
+     *        All elements must be in the range ``[0, 1]``.
+     * @param axis The dimension in which to compute the quantiles, *i.e* the axis representing the
+     *        distribution.
+     * @param alpha Interpolation parameter. Must be in the range ``[0, 1]]``.
+     * @param beta Interpolation parameter. Must be in the range ``[0, 1]]``.
+     * @tparam T The type in which the quantile are computed.
+     * @return An expression with as many dimensions as the input @p e.
+     *         The first axis correspond to the quantiles.
+     *         The other axes are the axes that remain after the reduction of @p e.
+     * @see (Hyndman and Fan, 1996) R. J. Hyndman and Y. Fan,
+     *      "Sample quantiles in statistical packages", The American Statistician,
+     *      50(4), pp. 361-365, 1996
+     * @see https://en.wikipedia.org/wiki/Quantile
+     */
+    template <class T = double, class E, class P>
+    inline auto quantile(E&& e, const P& probas, std::ptrdiff_t axis, T alpha, T beta)
+    {
+        XTENSOR_ASSERT(all(0. <= probas));
+        XTENSOR_ASSERT(all(probas <= 1.));
+        XTENSOR_ASSERT(0. <= alpha);
+        XTENSOR_ASSERT(alpha <= 1.);
+        XTENSOR_ASSERT(0. <= beta);
+        XTENSOR_ASSERT(beta <= 1.);
+
+        using tmp_shape_t = get_strides_t<typename std::decay_t<E>::shape_type>;
+        using id_t = typename tmp_shape_t::value_type;
+
+        const std::size_t ax = normalize_axis(e.dimension(), axis);
+        const std::size_t n = e.shape()[ax];
+        auto kth_gamma = detail::quantile_kth_gamma<T, id_t, P>(n, probas, alpha, beta);
+
+        // Select relevant values for computing interpolating quantiles
+        auto e_partition = xt::partition(std::forward<E>(e), kth_gamma.first, ax);
+        auto e_kth = detail::fancy_indexing(std::move(e_partition), std::move(kth_gamma.first), ax);
+
+        // Reshape interpolation coefficients
+        auto gm1_g_shape = xtl::make_sequence<tmp_shape_t>(e.dimension(), 1);
+        gm1_g_shape[ax] = kth_gamma.second.size();
+        auto gm1_g_reshaped = reshape_view(std::move(kth_gamma.second), std::move(gm1_g_shape));
+
+        // Compute interpolation
+        // TODO(C++20) use (and create) xt::lerp in C++
+        auto e_kth_g = std::move(e_kth) * std::move(gm1_g_reshaped);
+        // Reshape pairwise interpolate for suming along new axis
+        auto e_kth_g_shape = detail::unsqueeze_shape(e_kth_g.shape(), ax);
+        e_kth_g_shape[ax] = 2;
+        e_kth_g_shape[ax + 1] /= 2;
+        auto quantiles = xt::sum(reshape_view(std::move(e_kth_g), std::move(e_kth_g_shape)), ax);
+        // Cannot do a transpose on a non-strided expression so we have to eval
+        return moveaxis(eval(std::move(quantiles)), ax, 0);
+    }
+
+    // Static proba array overload
+    template <class T = double, class E, std::size_t N>
+    inline auto quantile(E&& e, const T (&probas)[N], std::ptrdiff_t axis, T alpha, T beta)
+    {
+        return quantile(std::forward<E>(e), adapt(probas, {N}), axis, alpha, beta);
+    }
+
+    /**
+     * Compute quantiles of the whole expression.
+     *
+     * The quantiles are computed over the whole expression, as if flatten in a one-dimensional
+     * expression.
+     *
+     * @ingroup xt_xsort
+     * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, T alpha, T beta)
+     */
+    template <class T = double, class E, class P>
+    inline auto quantile(E&& e, const P& probas, T alpha, T beta)
+    {
+        return quantile(xt::ravel(std::forward<E>(e)), probas, 0, alpha, beta);
+    }
+
+    // Static proba array overload
+    template <class T = double, class E, std::size_t N>
+    inline auto quantile(E&& e, const T (&probas)[N], T alpha, T beta)
+    {
+        return quantile(std::forward<E>(e), adapt(probas, {N}), alpha, beta);
+    }
+
+    /**
+     * Quantile interpolation method.
+     *
+     * Predefined methods for interpolating quantiles, as defined in (Hyndman and Fan, 1996).
+     *
+     * @ingroup xt_xsort
+     * @see (Hyndman and Fan, 1996) R. J. Hyndman and Y. Fan,
+     *      "Sample quantiles in statistical packages", The American Statistician,
+     *      50(4), pp. 361-365, 1996
+     * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, xt::quantile_method method)
+     */
+    enum class quantile_method
+    {
+        /** Method 4 of (Hyndman and Fan, 1996) with ``alpha=0`` and ``beta=1``. */
+        interpolated_inverted_cdf = 4,
+        /** Method 5 of (Hyndman and Fan, 1996) with ``alpha=1/2`` and ``beta=1/2``. */
+        hazen,
+        /** Method 6 of (Hyndman and Fan, 1996) with ``alpha=0`` and ``beta=0``. */
+        weibull,
+        /** Method 7 of (Hyndman and Fan, 1996) with ``alpha=1`` and ``beta=1``. */
+        linear,
+        /** Method 8 of (Hyndman and Fan, 1996) with ``alpha=1/3`` and ``beta=1/3``. */
+        median_unbiased,
+        /** Method 9 of (Hyndman and Fan, 1996) with ``alpha=3/8`` and ``beta=3/8``. */
+        normal_unbiased,
+    };
+
+    /**
+     * Compute quantiles over the given axis.
+     *
+     * The function takes the name of a predefined method to compute to interpolate between values.
+     *
+     * @ingroup xt_xsort
+     * @see xt::quantile_method
+     * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, T alpha, T beta)
+     */
+    template <class T = double, class E, class P>
+    inline auto
+    quantile(E&& e, const P& probas, std::ptrdiff_t axis, quantile_method method = quantile_method::linear)
+    {
+        T alpha = 0.;
+        T beta = 0.;
+        switch (method)
+        {
+            case (quantile_method::interpolated_inverted_cdf):
+            {
+                alpha = 0.;
+                beta = 1.;
+                break;
+            }
+            case (quantile_method::hazen):
+            {
+                alpha = 0.5;
+                beta = 0.5;
+                break;
+            }
+            case (quantile_method::weibull):
+            {
+                alpha = 0.;
+                beta = 0.;
+                break;
+            }
+            case (quantile_method::linear):
+            {
+                alpha = 1.;
+                beta = 1.;
+                break;
+            }
+            case (quantile_method::median_unbiased):
+            {
+                alpha = 1. / 3.;
+                beta = 1. / 3.;
+                break;
+            }
+            case (quantile_method::normal_unbiased):
+            {
+                alpha = 3. / 8.;
+                beta = 3. / 8.;
+                break;
+            }
+        }
+        return quantile(std::forward<E>(e), probas, axis, alpha, beta);
+    }
+
+    // Static proba array overload
+    template <class T = double, class E, std::size_t N>
+    inline auto
+    quantile(E&& e, const T (&probas)[N], std::ptrdiff_t axis, quantile_method method = quantile_method::linear)
+    {
+        return quantile(std::forward<E>(e), adapt(probas, {N}), axis, method);
+    }
+
+    /**
+     * Compute quantiles of the whole expression.
+     *
+     * The quantiles are computed over the whole expression, as if flatten in a one-dimensional
+     * expression.
+     * The function takes the name of a predefined method to compute to interpolate between values.
+     *
+     * @ingroup xt_xsort
+     * @see xt::quantile_method
+     * @see xt::quantile(E&& e, P const& probas, std::ptrdiff_t axis, xt::quantile_method method)
+     */
+    template <class T = double, class E, class P>
+    inline auto quantile(E&& e, const P& probas, quantile_method method = quantile_method::linear)
+    {
+        return quantile(xt::ravel(std::forward<E>(e)), probas, 0, method);
+    }
+
+    // Static proba array overload
+    template <class T = double, class E, std::size_t N>
+    inline auto quantile(E&& e, const T (&probas)[N], quantile_method method = quantile_method::linear)
+    {
+        return quantile(std::forward<E>(e), adapt(probas, {N}), method);
+    }
+
+    /****************
+     *  xt::median  *
+     ****************/
+
+    template <class E>
+    inline typename std::decay_t<E>::value_type median(E&& e)
+    {
+        using value_type = typename std::decay_t<E>::value_type;
+        auto sz = e.size();
+        if (sz % 2 == 0)
+        {
+            std::size_t szh = sz / 2;  // integer floor div
+            std::array<std::size_t, 2> kth = {szh - 1, szh};
+            auto values = xt::partition(xt::flatten(e), kth);
+            return (values[kth[0]] + values[kth[1]]) / value_type(2);
+        }
+        else
+        {
+            std::array<std::size_t, 1> kth = {(sz - 1) / 2};
+            auto values = xt::partition(xt::flatten(e), kth);
+            return values[kth[0]];
+        }
+    }
+
+    /**
+     * Find the median along the specified axis
+     *
+     * Given a vector V of length N, the median of V is the middle value of a
+     * sorted copy of V, V_sorted - i e., V_sorted[(N-1)/2], when N is odd,
+     * and the average of the two middle values of V_sorted when N is even.
+     *
+     * @ingroup xt_xsort
+     * @param axis axis along which the medians are computed.
+     *             If not set, computes the median along a flattened version of the input.
+     * @param e input xexpression
+     * @return median value
+     */
+    template <class E>
+    inline auto median(E&& e, std::ptrdiff_t axis)
+    {
+        std::size_t ax = normalize_axis(e.dimension(), axis);
+        std::size_t sz = e.shape()[ax];
+        xstrided_slice_vector sv(e.dimension(), xt::all());
+
+        if (sz % 2 == 0)
+        {
+            std::size_t szh = sz / 2;  // integer floor div
+            std::array<std::size_t, 2> kth = {szh - 1, szh};
+            auto values = xt::partition(std::forward<E>(e), kth, static_cast<ptrdiff_t>(ax));
+            sv[ax] = xt::range(szh - 1, szh + 1);
+            return xt::mean(xt::strided_view(std::move(values), std::move(sv)), {ax});
+        }
+        else
+        {
+            std::size_t szh = (sz - 1) / 2;
+            std::array<std::size_t, 1> kth = {(sz - 1) / 2};
+            auto values = xt::partition(std::forward<E>(e), kth, static_cast<ptrdiff_t>(ax));
+            sv[ax] = xt::range(szh, szh + 1);
+            return xt::mean(xt::strided_view(std::move(values), std::move(sv)), {ax});
+        }
+    }
+
+    namespace detail
+    {
+        template <class T>
+        struct argfunc_result_type
+        {
+            using type = xarray<std::size_t>;
+        };
+
+        template <class T, std::size_t N>
+        struct argfunc_result_type<xtensor<T, N>>
+        {
+            using type = xtensor<std::size_t, N - 1>;
+        };
+
+        template <layout_type L, class E, class F>
+        inline typename argfunc_result_type<E>::type arg_func_impl(const E& e, std::size_t axis, F&& cmp)
+        {
+            using eval_type = typename detail::sort_eval_type<E>::type;
+            using value_type = typename E::value_type;
+            using result_type = typename argfunc_result_type<E>::type;
+            using result_shape_type = typename result_type::shape_type;
+
+            if (e.dimension() == 1)
+            {
+                auto begin = e.template begin<L>();
+                auto end = e.template end<L>();
+                // todo C++17 : constexpr
+                if (std::is_same<F, std::less<value_type>>::value)
+                {
+                    std::size_t i = static_cast<std::size_t>(std::distance(begin, std::min_element(begin, end)));
+                    return xtensor<size_t, 0>{i};
+                }
+                else
+                {
+                    std::size_t i = static_cast<std::size_t>(std::distance(begin, std::max_element(begin, end)));
+                    return xtensor<size_t, 0>{i};
+                }
+            }
+
+            result_shape_type alt_shape;
+            xt::resize_container(alt_shape, e.dimension() - 1);
+
+            // Excluding copy, copy all of shape except for axis
+            std::copy(e.shape().cbegin(), e.shape().cbegin() + std::ptrdiff_t(axis), alt_shape.begin());
+            std::copy(
+                e.shape().cbegin() + std::ptrdiff_t(axis) + 1,
+                e.shape().cend(),
+                alt_shape.begin() + std::ptrdiff_t(axis)
+            );
+
+            result_type result = result_type::from_shape(std::move(alt_shape));
+            auto result_iter = result.template begin<L>();
+
+            auto arg_func_lambda = [&result_iter, &cmp](auto begin, auto end)
+            {
+                std::size_t idx = 0;
+                value_type val = *begin;
+                ++begin;
+                for (std::size_t i = 1; begin != end; ++begin, ++i)
+                {
+                    if (cmp(*begin, val))
+                    {
+                        val = *begin;
+                        idx = i;
+                    }
+                }
+                *result_iter = idx;
+                ++result_iter;
+            };
+
+            if (axis != detail::leading_axis(e))
+            {
+                dynamic_shape<std::size_t> permutation, reverse_permutation;
+                std::tie(
+                    permutation,
+                    reverse_permutation
+                ) = detail::get_permutations(e.dimension(), axis, e.layout());
+
+                // note: creating copy
+                eval_type input = transpose(e, permutation);
+                detail::call_over_leading_axis(input, arg_func_lambda);
+                return result;
+            }
+            else
+            {
+                auto&& input = eval(e);
+                detail::call_over_leading_axis(input, arg_func_lambda);
+                return result;
+            }
+        }
+    }
+
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E>
+    inline auto argmin(const xexpression<E>& e)
+    {
+        using value_type = typename E::value_type;
+        auto&& ed = eval(e.derived_cast());
+        auto begin = ed.template begin<L>();
+        auto end = ed.template end<L>();
+        std::size_t i = static_cast<std::size_t>(std::distance(begin, std::min_element(begin, end)));
+        return xtensor<size_t, 0>{i};
+    }
+
+    /**
+     * Find position of minimal value in xexpression.
+     * By default, the returned index is into the flattened array.
+     * If `axis` is specified, the indices are along the specified axis.
+     *
+     * @param e input xexpression
+     * @param axis select axis (optional)
+     *
+     * @return returns xarray with positions of minimal value
+     */
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E>
+    inline auto argmin(const xexpression<E>& e, std::ptrdiff_t axis)
+    {
+        using value_type = typename E::value_type;
+        auto&& ed = eval(e.derived_cast());
+        std::size_t ax = normalize_axis(ed.dimension(), axis);
+        return detail::arg_func_impl<L>(ed, ax, std::less<value_type>());
+    }
+
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E>
+    inline auto argmax(const xexpression<E>& e)
+    {
+        using value_type = typename E::value_type;
+        auto&& ed = eval(e.derived_cast());
+        auto begin = ed.template begin<L>();
+        auto end = ed.template end<L>();
+        std::size_t i = static_cast<std::size_t>(std::distance(begin, std::max_element(begin, end)));
+        return xtensor<size_t, 0>{i};
+    }
+
+    /**
+     * Find position of maximal value in xexpression
+     * By default, the returned index is into the flattened array.
+     * If `axis` is specified, the indices are along the specified axis.
+     *
+     * @ingroup xt_xsort
+     * @param e input xexpression
+     * @param axis select axis (optional)
+     *
+     * @return returns xarray with positions of maximal value
+     */
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E>
+    inline auto argmax(const xexpression<E>& e, std::ptrdiff_t axis)
+    {
+        using value_type = typename E::value_type;
+        auto&& ed = eval(e.derived_cast());
+        std::size_t ax = normalize_axis(ed.dimension(), axis);
+        return detail::arg_func_impl<L>(ed, ax, std::greater<value_type>());
+    }
+
+    /**
+     * Find unique elements of a xexpression. This returns a flattened xtensor with
+     * sorted, unique elements from the original expression.
+     *
+     * @ingroup xt_xsort
+     * @param e input xexpression (will be flattened)
+     */
+    template <class E>
+    inline auto unique(const xexpression<E>& e)
+    {
+        auto sorted = sort(e, xnone());
+        auto end = std::unique(sorted.begin(), sorted.end());
+        std::size_t sz = static_cast<std::size_t>(std::distance(sorted.begin(), end));
+        // TODO check if we can shrink the vector without reallocation
+        using value_type = typename E::value_type;
+        auto result = xtensor<value_type, 1>::from_shape({sz});
+        std::copy(sorted.begin(), end, result.begin());
+        return result;
+    }
+
+    /**
+     * Find the set difference of two xexpressions. This returns a flattened xtensor with
+     * the sorted, unique values in ar1 that are not in ar2.
+     *
+     * @ingroup xt_xsort
+     * @param ar1 input xexpression (will be flattened)
+     * @param ar2 input xexpression
+     */
+    template <class E1, class E2>
+    inline auto setdiff1d(const xexpression<E1>& ar1, const xexpression<E2>& ar2)
+    {
+        using value_type = typename E1::value_type;
+
+        auto unique1 = unique(ar1);
+        auto unique2 = unique(ar2);
+
+        auto tmp = xtensor<value_type, 1>::from_shape({unique1.size()});
+
+        auto end = std::set_difference(unique1.begin(), unique1.end(), unique2.begin(), unique2.end(), tmp.begin());
+
+        std::size_t sz = static_cast<std::size_t>(std::distance(tmp.begin(), end));
+
+        auto result = xtensor<value_type, 1>::from_shape({sz});
+
+        std::copy(tmp.begin(), end, result.begin());
+
+        return result;
+    }
+}
+
+#endif

+ 1984 - 0
3rd/numpy/include/xtensor/xstorage.hpp

@@ -0,0 +1,1984 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_STORAGE_HPP
+#define XTENSOR_STORAGE_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+
+#include "xexception.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_simd.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    namespace detail
+    {
+        template <class It>
+        using require_input_iter = typename std::enable_if<
+            std::is_convertible<typename std::iterator_traits<It>::iterator_category, std::input_iterator_tag>::value>::type;
+    }
+
+    template <class C>
+    struct is_contiguous_container : std::true_type
+    {
+    };
+
+    template <class T, class A = std::allocator<T>>
+    class uvector
+    {
+    public:
+
+        using allocator_type = A;
+
+        using value_type = typename std::allocator_traits<A>::value_type;
+        using reference = value_type&;
+        using const_reference = const value_type&;
+        using pointer = typename std::allocator_traits<A>::pointer;
+        using const_pointer = typename std::allocator_traits<A>::const_pointer;
+
+        using size_type = typename std::allocator_traits<A>::size_type;
+        using difference_type = typename std::allocator_traits<A>::difference_type;
+
+        using iterator = pointer;
+        using const_iterator = const_pointer;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+        uvector() noexcept;
+        explicit uvector(const allocator_type& alloc) noexcept;
+        explicit uvector(size_type count, const allocator_type& alloc = allocator_type());
+        uvector(size_type count, const_reference value, const allocator_type& alloc = allocator_type());
+
+        template <class InputIt, class = detail::require_input_iter<InputIt>>
+        uvector(InputIt first, InputIt last, const allocator_type& alloc = allocator_type());
+
+        uvector(std::initializer_list<T> init, const allocator_type& alloc = allocator_type());
+
+        ~uvector();
+
+        uvector(const uvector& rhs);
+        uvector(const uvector& rhs, const allocator_type& alloc);
+        uvector& operator=(const uvector&);
+
+        uvector(uvector&& rhs) noexcept;
+        uvector(uvector&& rhs, const allocator_type& alloc) noexcept;
+        uvector& operator=(uvector&& rhs) noexcept;
+
+        allocator_type get_allocator() const noexcept;
+
+        bool empty() const noexcept;
+        size_type size() const noexcept;
+        void resize(size_type size);
+        size_type max_size() const noexcept;
+        void reserve(size_type new_cap);
+        size_type capacity() const noexcept;
+        void shrink_to_fit();
+        void clear();
+
+        reference operator[](size_type i);
+        const_reference operator[](size_type i) const;
+
+        reference at(size_type i);
+        const_reference at(size_type i) const;
+
+        reference front();
+        const_reference front() const;
+
+        reference back();
+        const_reference back() const;
+
+        pointer data() noexcept;
+        const_pointer data() const noexcept;
+
+        iterator begin() noexcept;
+        iterator end() noexcept;
+
+        const_iterator begin() const noexcept;
+        const_iterator end() const noexcept;
+
+        const_iterator cbegin() const noexcept;
+        const_iterator cend() const noexcept;
+
+        reverse_iterator rbegin() noexcept;
+        reverse_iterator rend() noexcept;
+
+        const_reverse_iterator rbegin() const noexcept;
+        const_reverse_iterator rend() const noexcept;
+
+        const_reverse_iterator crbegin() const noexcept;
+        const_reverse_iterator crend() const noexcept;
+
+        void swap(uvector& rhs) noexcept;
+
+    private:
+
+        template <class I>
+        void init_data(I first, I last);
+
+        void resize_impl(size_type new_size);
+
+        allocator_type m_allocator;
+
+        // Storing a pair of pointers is more efficient for iterating than
+        // storing a pointer to the beginning and the size of the container
+        pointer p_begin;
+        pointer p_end;
+    };
+
+    template <class T, class A>
+    bool operator==(const uvector<T, A>& lhs, const uvector<T, A>& rhs);
+
+    template <class T, class A>
+    bool operator!=(const uvector<T, A>& lhs, const uvector<T, A>& rhs);
+
+    template <class T, class A>
+    bool operator<(const uvector<T, A>& lhs, const uvector<T, A>& rhs);
+
+    template <class T, class A>
+    bool operator<=(const uvector<T, A>& lhs, const uvector<T, A>& rhs);
+
+    template <class T, class A>
+    bool operator>(const uvector<T, A>& lhs, const uvector<T, A>& rhs);
+
+    template <class T, class A>
+    bool operator>=(const uvector<T, A>& lhs, const uvector<T, A>& rhs);
+
+    template <class T, class A>
+    void swap(uvector<T, A>& lhs, uvector<T, A>& rhs) noexcept;
+
+    /**************************
+     * uvector implementation *
+     **************************/
+
+    namespace detail
+    {
+        template <class A>
+        inline typename std::allocator_traits<A>::pointer
+        safe_init_allocate(A& alloc, typename std::allocator_traits<A>::size_type size)
+        {
+            using traits = std::allocator_traits<A>;
+            using pointer = typename traits::pointer;
+            using value_type = typename traits::value_type;
+            pointer res = alloc.allocate(size);
+            if (!xtrivially_default_constructible<value_type>::value)
+            {
+                for (pointer p = res; p != res + size; ++p)
+                {
+                    traits::construct(alloc, p, value_type());
+                }
+            }
+            return res;
+        }
+
+        template <class A>
+        inline void safe_destroy_deallocate(
+            A& alloc,
+            typename std::allocator_traits<A>::pointer ptr,
+            typename std::allocator_traits<A>::size_type size
+        )
+        {
+            using traits = std::allocator_traits<A>;
+            using pointer = typename traits::pointer;
+            using value_type = typename traits::value_type;
+            if (ptr != nullptr)
+            {
+                if (!xtrivially_default_constructible<value_type>::value)
+                {
+                    for (pointer p = ptr; p != ptr + size; ++p)
+                    {
+                        traits::destroy(alloc, p);
+                    }
+                }
+                traits::deallocate(alloc, ptr, size);
+            }
+        }
+    }
+
+    template <class T, class A>
+    template <class I>
+    inline void uvector<T, A>::init_data(I first, I last)
+    {
+        size_type size = static_cast<size_type>(std::distance(first, last));
+        if (size != size_type(0))
+        {
+            p_begin = m_allocator.allocate(size);
+            std::uninitialized_copy(first, last, p_begin);
+            p_end = p_begin + size;
+        }
+    }
+
+    template <class T, class A>
+    inline void uvector<T, A>::resize_impl(size_type new_size)
+    {
+        size_type old_size = size();
+        pointer old_begin = p_begin;
+        if (new_size != old_size)
+        {
+            p_begin = detail::safe_init_allocate(m_allocator, new_size);
+            p_end = p_begin + new_size;
+            detail::safe_destroy_deallocate(m_allocator, old_begin, old_size);
+        }
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector() noexcept
+        : uvector(allocator_type())
+    {
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(const allocator_type& alloc) noexcept
+        : m_allocator(alloc)
+        , p_begin(nullptr)
+        , p_end(nullptr)
+    {
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(size_type count, const allocator_type& alloc)
+        : m_allocator(alloc)
+        , p_begin(nullptr)
+        , p_end(nullptr)
+    {
+        if (count != 0)
+        {
+            p_begin = detail::safe_init_allocate(m_allocator, count);
+            p_end = p_begin + count;
+        }
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(size_type count, const_reference value, const allocator_type& alloc)
+        : m_allocator(alloc)
+        , p_begin(nullptr)
+        , p_end(nullptr)
+    {
+        if (count != 0)
+        {
+            p_begin = m_allocator.allocate(count);
+            p_end = p_begin + count;
+            std::uninitialized_fill(p_begin, p_end, value);
+        }
+    }
+
+    template <class T, class A>
+    template <class InputIt, class>
+    inline uvector<T, A>::uvector(InputIt first, InputIt last, const allocator_type& alloc)
+        : m_allocator(alloc)
+        , p_begin(nullptr)
+        , p_end(nullptr)
+    {
+        init_data(first, last);
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(std::initializer_list<T> init, const allocator_type& alloc)
+        : m_allocator(alloc)
+        , p_begin(nullptr)
+        , p_end(nullptr)
+    {
+        init_data(init.begin(), init.end());
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::~uvector()
+    {
+        detail::safe_destroy_deallocate(m_allocator, p_begin, size());
+        p_begin = nullptr;
+        p_end = nullptr;
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(const uvector& rhs)
+        : m_allocator(
+            std::allocator_traits<allocator_type>::select_on_container_copy_construction(rhs.get_allocator())
+        )
+        , p_begin(nullptr)
+        , p_end(nullptr)
+    {
+        init_data(rhs.p_begin, rhs.p_end);
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(const uvector& rhs, const allocator_type& alloc)
+        : m_allocator(alloc)
+        , p_begin(nullptr)
+        , p_end(nullptr)
+    {
+        init_data(rhs.p_begin, rhs.p_end);
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>& uvector<T, A>::operator=(const uvector& rhs)
+    {
+        // No copy and swap idiom here due to performance issues
+        if (this != &rhs)
+        {
+            m_allocator = std::allocator_traits<allocator_type>::select_on_container_copy_construction(
+                rhs.get_allocator()
+            );
+            resize_impl(rhs.size());
+            if (xtrivially_default_constructible<value_type>::value)
+            {
+                std::uninitialized_copy(rhs.p_begin, rhs.p_end, p_begin);
+            }
+            else
+            {
+                std::copy(rhs.p_begin, rhs.p_end, p_begin);
+            }
+        }
+        return *this;
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(uvector&& rhs) noexcept
+        : m_allocator(std::move(rhs.m_allocator))
+        , p_begin(rhs.p_begin)
+        , p_end(rhs.p_end)
+    {
+        rhs.p_begin = nullptr;
+        rhs.p_end = nullptr;
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>::uvector(uvector&& rhs, const allocator_type& alloc) noexcept
+        : m_allocator(alloc)
+        , p_begin(rhs.p_begin)
+        , p_end(rhs.p_end)
+    {
+        rhs.p_begin = nullptr;
+        rhs.p_end = nullptr;
+    }
+
+    template <class T, class A>
+    inline uvector<T, A>& uvector<T, A>::operator=(uvector&& rhs) noexcept
+    {
+        using std::swap;
+        uvector tmp(std::move(rhs));
+        swap(p_begin, tmp.p_begin);
+        swap(p_end, tmp.p_end);
+        return *this;
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::get_allocator() const noexcept -> allocator_type
+    {
+        return allocator_type(m_allocator);
+    }
+
+    template <class T, class A>
+    inline bool uvector<T, A>::empty() const noexcept
+    {
+        return size() == size_type(0);
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::size() const noexcept -> size_type
+    {
+        return static_cast<size_type>(p_end - p_begin);
+    }
+
+    template <class T, class A>
+    inline void uvector<T, A>::resize(size_type size)
+    {
+        resize_impl(size);
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::max_size() const noexcept -> size_type
+    {
+        return m_allocator.max_size();
+    }
+
+    template <class T, class A>
+    inline void uvector<T, A>::reserve(size_type /*new_cap*/)
+    {
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::capacity() const noexcept -> size_type
+    {
+        return size();
+    }
+
+    template <class T, class A>
+    inline void uvector<T, A>::shrink_to_fit()
+    {
+    }
+
+    template <class T, class A>
+    inline void uvector<T, A>::clear()
+    {
+        resize(size_type(0));
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::operator[](size_type i) -> reference
+    {
+        return p_begin[i];
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::operator[](size_type i) const -> const_reference
+    {
+        return p_begin[i];
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::at(size_type i) -> reference
+    {
+        if (i >= size())
+        {
+            XTENSOR_THROW(std::out_of_range, "Out of range in uvector access");
+        }
+        return this->operator[](i);
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::at(size_type i) const -> const_reference
+    {
+        if (i >= size())
+        {
+            XTENSOR_THROW(std::out_of_range, "Out of range in uvector access");
+        }
+        return this->operator[](i);
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::front() -> reference
+    {
+        return p_begin[0];
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::front() const -> const_reference
+    {
+        return p_begin[0];
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::back() -> reference
+    {
+        return *(p_end - 1);
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::back() const -> const_reference
+    {
+        return *(p_end - 1);
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::data() noexcept -> pointer
+    {
+        return p_begin;
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::data() const noexcept -> const_pointer
+    {
+        return p_begin;
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::begin() noexcept -> iterator
+    {
+        return p_begin;
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::end() noexcept -> iterator
+    {
+        return p_end;
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::begin() const noexcept -> const_iterator
+    {
+        return p_begin;
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::end() const noexcept -> const_iterator
+    {
+        return p_end;
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::cbegin() const noexcept -> const_iterator
+    {
+        return begin();
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::cend() const noexcept -> const_iterator
+    {
+        return end();
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::rbegin() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(end());
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::rend() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(begin());
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::rbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(end());
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::rend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(begin());
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::crbegin() const noexcept -> const_reverse_iterator
+    {
+        return rbegin();
+    }
+
+    template <class T, class A>
+    inline auto uvector<T, A>::crend() const noexcept -> const_reverse_iterator
+    {
+        return rend();
+    }
+
+    template <class T, class A>
+    inline void uvector<T, A>::swap(uvector<T, A>& rhs) noexcept
+    {
+        using std::swap;
+        swap(m_allocator, rhs.m_allocator);
+        swap(p_begin, rhs.p_begin);
+        swap(p_end, rhs.p_end);
+    }
+
+    template <class T, class A>
+    inline bool operator==(const uvector<T, A>& lhs, const uvector<T, A>& rhs)
+    {
+        return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+    }
+
+    template <class T, class A>
+    inline bool operator!=(const uvector<T, A>& lhs, const uvector<T, A>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    template <class T, class A>
+    inline bool operator<(const uvector<T, A>& lhs, const uvector<T, A>& rhs)
+    {
+        return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+    }
+
+    template <class T, class A>
+    inline bool operator<=(const uvector<T, A>& lhs, const uvector<T, A>& rhs)
+    {
+        return !(lhs > rhs);
+    }
+
+    template <class T, class A>
+    inline bool operator>(const uvector<T, A>& lhs, const uvector<T, A>& rhs)
+    {
+        return rhs < lhs;
+    }
+
+    template <class T, class A>
+    inline bool operator>=(const uvector<T, A>& lhs, const uvector<T, A>& rhs)
+    {
+        return !(lhs < rhs);
+    }
+
+    template <class T, class A>
+    inline void swap(uvector<T, A>& lhs, uvector<T, A>& rhs) noexcept
+    {
+        lhs.swap(rhs);
+    }
+
+    /**************************
+     * svector implementation *
+     **************************/
+
+    namespace detail
+    {
+        template <class T>
+        struct allocator_alignment
+        {
+            static constexpr std::size_t value = 0;
+        };
+
+        template <class T, std::size_t A>
+        struct allocator_alignment<xt_simd::aligned_allocator<T, A>>
+        {
+            static constexpr std::size_t value = A;
+        };
+    }
+
+    template <class T, std::size_t N = 4, class A = std::allocator<T>, bool Init = true>
+    class svector
+    {
+    public:
+
+        using self_type = svector<T, N, A, Init>;
+        using allocator_type = A;
+        using size_type = typename std::allocator_traits<A>::size_type;
+        using value_type = typename std::allocator_traits<A>::value_type;
+        using pointer = typename std::allocator_traits<A>::pointer;
+        using const_pointer = typename std::allocator_traits<A>::const_pointer;
+        using reference = value_type&;
+        using const_reference = const value_type&;
+        using difference_type = typename std::allocator_traits<A>::difference_type;
+
+        using iterator = pointer;
+        using const_iterator = const_pointer;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+#if defined(_MSC_VER) && _MSC_VER < 1910
+        static constexpr std::size_t alignment = detail::allocator_alignment<A>::value;
+#else
+        static constexpr std::size_t alignment = detail::allocator_alignment<A>::value != 0
+                                                     ? detail::allocator_alignment<A>::value
+                                                     : alignof(T);
+#endif
+
+        svector() noexcept;
+        ~svector();
+
+        explicit svector(const allocator_type& alloc) noexcept;
+        explicit svector(size_type n, const allocator_type& alloc = allocator_type());
+        svector(size_type n, const value_type& v, const allocator_type& alloc = allocator_type());
+        svector(std::initializer_list<T> il, const allocator_type& alloc = allocator_type());
+
+        svector(const std::vector<T>& vec);
+
+        template <class IT, class = detail::require_input_iter<IT>>
+        svector(IT begin, IT end, const allocator_type& alloc = allocator_type());
+
+        template <std::size_t N2, bool I2, class = std::enable_if_t<N != N2, void>>
+        explicit svector(const svector<T, N2, A, I2>& rhs);
+
+        svector& operator=(const svector& rhs);
+        svector& operator=(svector&& rhs) noexcept(std::is_nothrow_move_assignable<value_type>::value);
+        svector& operator=(const std::vector<T>& rhs);
+        svector& operator=(std::initializer_list<T> il);
+
+        template <std::size_t N2, bool I2, class = std::enable_if_t<N != N2, void>>
+        svector& operator=(const svector<T, N2, A, I2>& rhs);
+
+        svector(const svector& other);
+        svector(svector&& other) noexcept(std::is_nothrow_move_constructible<value_type>::value);
+
+        void assign(size_type n, const value_type& v);
+
+        template <class V>
+        void assign(std::initializer_list<V> il);
+
+        template <class IT>
+        void assign(IT other_begin, IT other_end);
+
+        reference operator[](size_type idx);
+        const_reference operator[](size_type idx) const;
+
+        reference at(size_type idx);
+        const_reference at(size_type idx) const;
+
+        pointer data();
+        const_pointer data() const;
+
+        void push_back(const T& elt);
+        void push_back(T&& elt);
+        void pop_back();
+
+        iterator begin();
+        const_iterator begin() const;
+        const_iterator cbegin() const;
+        iterator end();
+        const_iterator end() const;
+        const_iterator cend() const;
+
+        reverse_iterator rbegin();
+        const_reverse_iterator rbegin() const;
+        const_reverse_iterator crbegin() const;
+        reverse_iterator rend();
+        const_reverse_iterator rend() const;
+        const_reverse_iterator crend() const;
+
+        bool empty() const;
+        size_type size() const;
+        void resize(size_type n);
+        size_type max_size() const noexcept;
+        size_type capacity() const;
+        void reserve(size_type n);
+        void shrink_to_fit();
+        void clear();
+
+        reference front();
+        const_reference front() const;
+        reference back();
+        const_reference back() const;
+
+        bool on_stack();
+
+        iterator erase(const_iterator cit);
+        iterator erase(const_iterator cfirst, const_iterator clast);
+
+        iterator insert(const_iterator it, const T& elt);
+
+        template <class It>
+        iterator insert(const_iterator pos, It first, It last);
+
+        iterator insert(const_iterator pos, std::initializer_list<T> l);
+
+        template <std::size_t ON, class OA, bool InitA>
+        void swap(svector<T, ON, OA, InitA>& rhs);
+
+        allocator_type get_allocator() const noexcept;
+
+    private:
+
+        A m_allocator;
+
+        T* m_begin = std::begin(m_data);
+        T* m_end = std::begin(m_data);
+        T* m_capacity = std::end(m_data);
+
+        // stack allocated memory
+        alignas(alignment) T m_data[N > 0 ? N : 1];
+
+        void grow(size_type min_capacity = 0);
+        void destroy_range(T* begin, T* end);
+    };
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::~svector()
+    {
+        if (!on_stack())
+        {
+            detail::safe_destroy_deallocate(m_allocator, m_begin, static_cast<std::size_t>(m_capacity - m_begin));
+        }
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector() noexcept
+        : svector(allocator_type())
+    {
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector(const allocator_type& alloc) noexcept
+        : m_allocator(alloc)
+    {
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector(size_type n, const allocator_type& alloc)
+        : m_allocator(alloc)
+    {
+        if (Init)
+        {
+            assign(n, T(0));
+        }
+        else
+        {
+            resize(n);
+        }
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    template <class IT, class>
+    inline svector<T, N, A, Init>::svector(IT begin, IT end, const allocator_type& alloc)
+        : m_allocator(alloc)
+    {
+        assign(begin, end);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    template <std::size_t N2, bool I2, class>
+    inline svector<T, N, A, Init>::svector(const svector<T, N2, A, I2>& rhs)
+        : m_allocator(rhs.get_allocator())
+    {
+        assign(rhs.begin(), rhs.end());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector(const std::vector<T>& vec)
+    {
+        assign(vec.begin(), vec.end());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector(size_type n, const value_type& v, const allocator_type& alloc)
+        : m_allocator(alloc)
+    {
+        assign(n, v);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector(std::initializer_list<T> il, const allocator_type& alloc)
+        : m_allocator(alloc)
+    {
+        assign(il.begin(), il.end());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>& svector<T, N, A, Init>::operator=(const svector& rhs)
+    {
+        assign(rhs.begin(), rhs.end());
+        return *this;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>& svector<T, N, A, Init>::operator=(svector&& rhs
+    ) noexcept(std::is_nothrow_move_assignable<value_type>::value)
+    {
+        assign(rhs.begin(), rhs.end());
+        return *this;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>& svector<T, N, A, Init>::operator=(const std::vector<T>& rhs)
+    {
+        m_allocator = std::allocator_traits<allocator_type>::select_on_container_copy_construction(
+            rhs.get_allocator()
+        );
+        assign(rhs.begin(), rhs.end());
+        return *this;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>& svector<T, N, A, Init>::operator=(std::initializer_list<T> il)
+    {
+        return operator=(self_type(il));
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    template <std::size_t N2, bool I2, class>
+    inline svector<T, N, A, Init>& svector<T, N, A, Init>::operator=(const svector<T, N2, A, I2>& rhs)
+    {
+        m_allocator = std::allocator_traits<allocator_type>::select_on_container_copy_construction(
+            rhs.get_allocator()
+        );
+        assign(rhs.begin(), rhs.end());
+        return *this;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector(const svector& rhs)
+        : m_allocator(
+            std::allocator_traits<allocator_type>::select_on_container_copy_construction(rhs.get_allocator())
+        )
+    {
+        assign(rhs.begin(), rhs.end());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline svector<T, N, A, Init>::svector(svector&& rhs
+    ) noexcept(std::is_nothrow_move_constructible<value_type>::value)
+    {
+        this->swap(rhs);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline void svector<T, N, A, Init>::assign(size_type n, const value_type& v)
+    {
+        if (n > N && n > capacity())
+        {
+            grow(n);
+        }
+        m_end = m_begin + n;
+        std::fill(begin(), end(), v);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    template <class V>
+    inline void svector<T, N, A, Init>::assign(std::initializer_list<V> il)
+    {
+        assign(il.begin(), il.end());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    template <class IT>
+    inline void svector<T, N, A, Init>::assign(IT other_begin, IT other_end)
+    {
+        std::size_t size = static_cast<std::size_t>(other_end - other_begin);
+        if (size > N && size > capacity())
+        {
+            grow(size);
+        }
+        std::uninitialized_copy(other_begin, other_end, m_begin);
+        m_end = m_begin + size;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::operator[](size_type idx) -> reference
+    {
+        return m_begin[idx];
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::operator[](size_type idx) const -> const_reference
+    {
+        return m_begin[idx];
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::at(size_type idx) -> reference
+    {
+        if (idx >= size())
+        {
+            XTENSOR_THROW(std::out_of_range, "Out of range in svector access");
+        }
+        return this->operator[](idx);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::at(size_type idx) const -> const_reference
+    {
+        if (idx >= size())
+        {
+            XTENSOR_THROW(std::out_of_range, "Out of range in svector access");
+        }
+        return this->operator[](idx);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::data() -> pointer
+    {
+        return m_begin;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::data() const -> const_pointer
+    {
+        return m_begin;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    void svector<T, N, A, Init>::resize(size_type n)
+    {
+        if (n > N && n > capacity())
+        {
+            grow(n);
+        }
+        size_type old_size = size();
+        m_end = m_begin + n;
+        if (Init && old_size < size())
+        {
+            std::fill(begin() + old_size, end(), T());
+        }
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::max_size() const noexcept -> size_type
+    {
+        return m_allocator.max_size();
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::capacity() const -> size_type
+    {
+        return static_cast<std::size_t>(m_capacity - m_begin);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline void svector<T, N, A, Init>::reserve(size_type n)
+    {
+        if (n > N && n > capacity())
+        {
+            grow(n);
+        }
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline void svector<T, N, A, Init>::shrink_to_fit()
+    {
+        // No op for now
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline void svector<T, N, A, Init>::clear()
+    {
+        resize(size_type(0));
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    void svector<T, N, A, Init>::push_back(const T& elt)
+    {
+        if (m_end >= m_capacity)
+        {
+            grow();
+        }
+        *(m_end++) = elt;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    void svector<T, N, A, Init>::push_back(T&& elt)
+    {
+        if (m_end >= m_capacity)
+        {
+            grow();
+        }
+        *(m_end++) = std::move(elt);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    void svector<T, N, A, Init>::pop_back()
+    {
+        --m_end;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::begin() -> iterator
+    {
+        return m_begin;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::begin() const -> const_iterator
+    {
+        return m_begin;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::cbegin() const -> const_iterator
+    {
+        return m_begin;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::end() -> iterator
+    {
+        return m_end;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::end() const -> const_iterator
+    {
+        return m_end;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::cend() const -> const_iterator
+    {
+        return m_end;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::rbegin() -> reverse_iterator
+    {
+        return reverse_iterator(m_end);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::rbegin() const -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_end);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::crbegin() const -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_end);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::rend() -> reverse_iterator
+    {
+        return reverse_iterator(m_begin);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::rend() const -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_begin);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::crend() const -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_begin);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::size() const -> size_type
+    {
+        return static_cast<size_type>(m_end - m_begin);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::empty() const -> bool
+    {
+        return m_begin == m_end;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::front() -> reference
+    {
+        XTENSOR_ASSERT(!empty());
+        return m_begin[0];
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::front() const -> const_reference
+    {
+        XTENSOR_ASSERT(!empty());
+        return m_begin[0];
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::back() -> reference
+    {
+        XTENSOR_ASSERT(!empty());
+        return m_end[-1];
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::back() const -> const_reference
+    {
+        XTENSOR_ASSERT(!empty());
+        return m_end[-1];
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::on_stack() -> bool
+    {
+        return m_begin == &m_data[0];
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::get_allocator() const noexcept -> allocator_type
+    {
+        return m_allocator;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::erase(const_iterator cit) -> iterator
+    {
+        auto it = const_cast<pointer>(cit);
+        iterator ret_val = it;
+        std::move(it + 1, m_end, it);
+        --m_end;
+        return ret_val;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::erase(const_iterator cfirst, const_iterator clast) -> iterator
+    {
+        auto first = const_cast<pointer>(cfirst);
+        auto last = const_cast<pointer>(clast);
+        if (last == m_end)
+        {
+            m_end = first;
+            return first;
+        }
+
+        iterator new_end = std::move(last, m_end, first);
+        m_end = new_end;
+        return first;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::insert(const_iterator cit, const T& elt) -> iterator
+    {
+        auto it = const_cast<pointer>(cit);
+        if (it == m_end)
+        {
+            push_back(elt);
+            return m_end - 1;
+        }
+
+        if (m_end >= m_capacity)
+        {
+            std::ptrdiff_t elt_no = it - m_begin;
+            grow();
+            it = m_begin + elt_no;
+        }
+
+        (*m_end) = back();
+        std::move_backward(it, m_end - 1, m_end);
+        ++m_end;
+
+        // Update ref if element moved
+        const T* elt_ptr = &elt;
+        bool cond = it <= elt_ptr && elt_ptr < m_end;
+        // More complicated than incrementing elt_ptr, but this avoids
+        // false positive array-bounds warning on GCC 10
+        const T* src_ptr = cond ? it + (elt_ptr - it) + std::ptrdiff_t(1) : elt_ptr;
+        *it = *src_ptr;
+        return it;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    template <class It>
+    inline auto svector<T, N, A, Init>::insert(const_iterator pos, It first, It last) -> iterator
+    {
+        auto it = const_cast<pointer>(pos);
+        difference_type n = std::distance(first, last);
+        if (n > 0)
+        {
+            if (n > m_capacity - m_end)
+            {
+                std::ptrdiff_t elt_no = it - m_begin;
+                grow(static_cast<size_t>((m_capacity - m_begin) + n));
+                it = m_begin + elt_no;
+            }
+
+            std::move_backward(it, m_end, m_end + n);
+            m_end += n;
+            std::copy(first, last, it);
+        }
+        return it;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline auto svector<T, N, A, Init>::insert(const_iterator pos, std::initializer_list<T> l) -> iterator
+    {
+        return insert(pos, l.begin(), l.end());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline void svector<T, N, A, Init>::destroy_range(T* begin, T* end)
+    {
+        if (!xtrivially_default_constructible<T>::value)
+        {
+            while (begin != end)
+            {
+                --end;
+                end->~T();
+            }
+        }
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    template <std::size_t ON, class OA, bool InitA>
+    inline void svector<T, N, A, Init>::swap(svector<T, ON, OA, InitA>& rhs)
+    {
+        using std::swap;
+        if (this == &rhs)
+        {
+            return;
+        }
+
+        // We can only avoid copying elements if neither vector is small.
+        if (!this->on_stack() && !rhs.on_stack())
+        {
+            swap(this->m_begin, rhs.m_begin);
+            swap(this->m_end, rhs.m_end);
+            swap(this->m_capacity, rhs.m_capacity);
+            return;
+        }
+
+        size_type rhs_old_size = rhs.size();
+        size_type old_size = this->size();
+
+        if (rhs_old_size > old_size)
+        {
+            this->resize(rhs_old_size);
+        }
+        else if (old_size > rhs_old_size)
+        {
+            rhs.resize(old_size);
+        }
+
+        // Swap the shared elements.
+        size_type min_size = (std::min)(old_size, rhs_old_size);
+        for (size_type i = 0; i < min_size; ++i)
+        {
+            swap((*this)[i], rhs[i]);
+        }
+
+        // Copy over the extra elts.
+        if (old_size > rhs_old_size)
+        {
+            std::copy(this->begin() + min_size, this->end(), rhs.begin() + min_size);
+            this->destroy_range(this->begin() + min_size, this->end());
+            this->m_end = this->begin() + min_size;
+        }
+        else if (rhs_old_size > old_size)
+        {
+            std::copy(rhs.begin() + min_size, rhs.end(), this->begin() + min_size);
+            this->destroy_range(rhs.begin() + min_size, rhs.end());
+            rhs.m_end = rhs.begin() + min_size;
+        }
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline void svector<T, N, A, Init>::grow(size_type min_capacity)
+    {
+        size_type current_size = size();
+        size_type new_capacity = 2 * current_size + 1;  // Always grow.
+        if (new_capacity < min_capacity)
+        {
+            new_capacity = min_capacity;
+        }
+
+        T* new_alloc;
+        // is data stack allocated?
+        if (m_begin == &m_data[0])
+        {
+            new_alloc = m_allocator.allocate(new_capacity);
+            std::uninitialized_copy(m_begin, m_end, new_alloc);
+        }
+        else
+        {
+            // If this wasn't grown from the inline copy, grow the allocated space.
+            new_alloc = m_allocator.allocate(new_capacity);
+            std::uninitialized_copy(m_begin, m_end, new_alloc);
+            m_allocator.deallocate(m_begin, std::size_t(m_capacity - m_begin));
+        }
+        XTENSOR_ASSERT(new_alloc);
+
+        m_end = new_alloc + current_size;
+        m_begin = new_alloc;
+        m_capacity = new_alloc + new_capacity;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator==(const std::vector<T>& lhs, const svector<T, N, A, Init>& rhs)
+    {
+        return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator==(const svector<T, N, A, Init>& lhs, const std::vector<T>& rhs)
+    {
+        return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator==(const svector<T, N, A, Init>& lhs, const svector<T, N, A, Init>& rhs)
+    {
+        return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator!=(const svector<T, N, A, Init>& lhs, const svector<T, N, A, Init>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator<(const svector<T, N, A, Init>& lhs, const svector<T, N, A, Init>& rhs)
+    {
+        return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator<=(const svector<T, N, A, Init>& lhs, const svector<T, N, A, Init>& rhs)
+    {
+        return !(lhs > rhs);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator>(const svector<T, N, A, Init>& lhs, const svector<T, N, A, Init>& rhs)
+    {
+        return rhs < lhs;
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline bool operator>=(const svector<T, N, A, Init>& lhs, const svector<T, N, A, Init>& rhs)
+    {
+        return !(lhs < rhs);
+    }
+
+    template <class T, std::size_t N, class A, bool Init>
+    inline void swap(svector<T, N, A, Init>& lhs, svector<T, N, A, Init>& rhs) noexcept
+    {
+        lhs.swap(rhs);
+    }
+
+    template <class X, class T, std::size_t N, class A, bool B>
+    struct rebind_container<X, svector<T, N, A, B>>
+    {
+        using traits = std::allocator_traits<A>;
+        using allocator = typename traits::template rebind_alloc<X>;
+        using type = svector<X, N, allocator, B>;
+    };
+
+    /**
+     * This array class is modeled after ``std::array`` but adds optional alignment through a template
+     * parameter.
+     *
+     * To be moved to xtl, along with the rest of xstorage.hpp
+     */
+    template <class T, std::size_t N, std::size_t Align = XTENSOR_SELECT_ALIGN(T)>
+    class alignas(Align) aligned_array : public std::array<T, N>
+    {
+    public:
+
+        // Note: this is for alignment detection. The allocator serves no other purpose than
+        //       that of a trait here.
+        using allocator_type = std::conditional_t<Align != 0, xt_simd::aligned_allocator<T, Align>, std::allocator<T>>;
+    };
+
+#if defined(_MSC_VER)
+#define XTENSOR_CONST
+#else
+#define XTENSOR_CONST const
+#endif
+
+#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
+#define GCC4_FALLBACK
+
+    namespace const_array_detail
+    {
+        template <class T, std::size_t N>
+        struct array_traits
+        {
+            using storage_type = T[N];
+
+            static constexpr T& ref(const storage_type& t, std::size_t n) noexcept
+            {
+                return const_cast<T&>(t[n]);
+            }
+
+            static constexpr T* ptr(const storage_type& t) noexcept
+            {
+                return const_cast<T*>(t);
+            }
+        };
+
+        template <class T>
+        struct array_traits<T, 0>
+        {
+            struct empty
+            {
+            };
+
+            using storage_type = empty;
+
+            static constexpr T& ref(const storage_type& /*t*/, std::size_t /*n*/) noexcept
+            {
+                return *static_cast<T*>(nullptr);
+            }
+
+            static constexpr T* ptr(const storage_type& /*t*/) noexcept
+            {
+                return nullptr;
+            }
+        };
+    }
+#endif
+
+    /**
+     * A std::array like class with all member function (except reverse iterators)
+     * as constexpr. The data is immutable once set.
+     */
+    template <class T, std::size_t N>
+    struct const_array
+    {
+        using size_type = std::size_t;
+        using value_type = T;
+        using pointer = value_type*;
+        using const_pointer = const value_type*;
+        using reference = value_type&;
+        using const_reference = const value_type&;
+        using difference_type = std::ptrdiff_t;
+        using iterator = pointer;
+        using const_iterator = const_pointer;
+
+        using reverse_iterator = std::reverse_iterator<const_iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+        constexpr const_reference operator[](std::size_t idx) const
+        {
+#ifdef GCC4_FALLBACK
+            return const_array_detail::array_traits<T, N>::ref(m_data, idx);
+#else
+            return m_data[idx];
+#endif
+        }
+
+        constexpr const_iterator begin() const noexcept
+        {
+            return cbegin();
+        }
+
+        constexpr const_iterator end() const noexcept
+        {
+            return cend();
+        }
+
+        constexpr const_iterator cbegin() const noexcept
+        {
+            return data();
+        }
+
+        constexpr const_iterator cend() const noexcept
+        {
+            return data() + N;
+        }
+
+        // TODO make constexpr once C++17 arrives
+        reverse_iterator rbegin() const noexcept
+        {
+            return crbegin();
+        }
+
+        reverse_iterator rend() const noexcept
+        {
+            return crend();
+        }
+
+        const_reverse_iterator crbegin() const noexcept
+        {
+            return const_reverse_iterator(end());
+        }
+
+        const_reverse_iterator crend() const noexcept
+        {
+            return const_reverse_iterator(begin());
+        }
+
+        constexpr const_pointer data() const noexcept
+        {
+#ifdef GCC4_FALLBACK
+            return const_array_detail::array_traits<T, N>::ptr(m_data);
+#else
+            return m_data;
+#endif
+        }
+
+        constexpr const_reference front() const noexcept
+        {
+#ifdef GCC4_FALLBACK
+            return const_array_detail::array_traits<T, N>::ref(m_data, 0);
+#else
+            return m_data[0];
+#endif
+        }
+
+        constexpr const_reference back() const noexcept
+        {
+#ifdef GCC4_FALLBACK
+            return N ? const_array_detail::array_traits<T, N>::ref(m_data, N - 1)
+                     : const_array_detail::array_traits<T, N>::ref(m_data, 0);
+#else
+            return m_data[size() - 1];
+#endif
+        }
+
+        constexpr bool empty() const noexcept
+        {
+            return size() == size_type(0);
+        }
+
+        constexpr size_type size() const noexcept
+        {
+            return N;
+        }
+
+#ifdef GCC4_FALLBACK
+        XTENSOR_CONST typename const_array_detail::array_traits<T, N>::storage_type m_data;
+#else
+        XTENSOR_CONST T m_data[N > 0 ? N : 1];
+#endif
+    };
+
+#undef GCC4_FALLBACK
+
+    template <class T, std::size_t N>
+    inline bool operator==(const const_array<T, N>& lhs, const const_array<T, N>& rhs)
+    {
+        return std::equal(lhs.cbegin(), lhs.cend(), rhs.cbegin());
+    }
+
+    template <class T, std::size_t N>
+    inline bool operator!=(const const_array<T, N>& lhs, const const_array<T, N>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    template <class T, std::size_t N>
+    inline bool operator<(const const_array<T, N>& lhs, const const_array<T, N>& rhs)
+    {
+        return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+    }
+
+    template <class T, std::size_t N>
+    inline bool operator<=(const const_array<T, N>& lhs, const const_array<T, N>& rhs)
+    {
+        return !(lhs > rhs);
+    }
+
+    template <class T, std::size_t N>
+    inline bool operator>(const const_array<T, N>& lhs, const const_array<T, N>& rhs)
+    {
+        return rhs < lhs;
+    }
+
+    template <class T, std::size_t N>
+    inline bool operator>=(const const_array<T, N>& lhs, const const_array<T, N>& rhs)
+    {
+        return !(lhs < rhs);
+    }
+
+// Workaround for rebind_container problems on GCC 8 with C++17 enabled
+#if defined(__GNUC__) && __GNUC__ > 6 && !defined(__clang__) && __cplusplus >= 201703L
+    template <class X, class T, std::size_t N>
+    struct rebind_container<X, aligned_array<T, N>>
+    {
+        using type = aligned_array<X, N>;
+    };
+
+    template <class X, class T, std::size_t N>
+    struct rebind_container<X, const_array<T, N>>
+    {
+        using type = const_array<X, N>;
+    };
+#endif
+
+    /**
+     * @class fixed_shape
+     * Fixed shape implementation for compile time defined arrays.
+     * @sa xshape
+     */
+    template <std::size_t... X>
+    class fixed_shape
+    {
+    public:
+
+#if defined(_MSC_VER)
+        using cast_type = std::array<std::size_t, sizeof...(X)>;
+#define XTENSOR_FIXED_SHAPE_CONSTEXPR inline
+#else
+        using cast_type = const_array<std::size_t, sizeof...(X)>;
+#define XTENSOR_FIXED_SHAPE_CONSTEXPR constexpr
+#endif
+        using value_type = std::size_t;
+        using size_type = std::size_t;
+        using const_iterator = typename cast_type::const_iterator;
+
+        static constexpr std::size_t size()
+        {
+            return sizeof...(X);
+        }
+
+        template <std::size_t idx>
+        static constexpr auto get()
+        {
+            using tmp_cast_type = std::array<std::size_t, sizeof...(X)>;
+            return std::get<idx>(tmp_cast_type{X...});
+        }
+
+        XTENSOR_FIXED_SHAPE_CONSTEXPR operator cast_type() const
+        {
+            return cast_type({X...});
+        }
+
+        XTENSOR_FIXED_SHAPE_CONSTEXPR auto begin() const
+        {
+            return m_array.begin();
+        }
+
+        XTENSOR_FIXED_SHAPE_CONSTEXPR auto end() const
+        {
+            return m_array.end();
+        }
+
+        auto rbegin() const
+        {
+            return m_array.rbegin();
+        }
+
+        auto rend() const
+        {
+            return m_array.rend();
+        }
+
+        XTENSOR_FIXED_SHAPE_CONSTEXPR auto cbegin() const
+        {
+            return m_array.cbegin();
+        }
+
+        XTENSOR_FIXED_SHAPE_CONSTEXPR auto cend() const
+        {
+            return m_array.cend();
+        }
+
+        XTENSOR_FIXED_SHAPE_CONSTEXPR std::size_t operator[](std::size_t idx) const
+        {
+            return m_array[idx];
+        }
+
+        XTENSOR_FIXED_SHAPE_CONSTEXPR bool empty() const
+        {
+            return sizeof...(X) == 0;
+        }
+
+    private:
+
+        XTENSOR_CONSTEXPR_ENHANCED_STATIC cast_type m_array = cast_type({X...});
+    };
+
+#ifdef XTENSOR_HAS_CONSTEXPR_ENHANCED
+    template <std::size_t... X>
+    constexpr typename fixed_shape<X...>::cast_type fixed_shape<X...>::m_array;
+#endif
+
+#undef XTENSOR_FIXED_SHAPE_CONSTEXPR
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End = -1>
+    class sequence_view
+    {
+    public:
+
+        using value_type = typename E::value_type;
+        using reference = typename E::reference;
+        using const_reference = typename E::const_reference;
+        using pointer = typename E::pointer;
+        using const_pointer = typename E::const_pointer;
+
+        using size_type = typename E::size_type;
+        using difference_type = typename E::difference_type;
+
+        using iterator = typename E::iterator;
+        using const_iterator = typename E::const_iterator;
+        using reverse_iterator = typename E::reverse_iterator;
+        using const_reverse_iterator = typename E::const_reverse_iterator;
+
+        explicit sequence_view(const E& container);
+
+        template <std::ptrdiff_t OS, std::ptrdiff_t OE>
+        explicit sequence_view(const sequence_view<E, OS, OE>& other);
+
+        template <class T, class R = decltype(std::declval<T>().begin())>
+        operator T() const;
+
+        bool empty() const;
+        size_type size() const;
+        const_reference operator[](std::size_t idx) const;
+
+        const_iterator end() const;
+        const_iterator begin() const;
+        const_iterator cend() const;
+        const_iterator cbegin() const;
+
+        const_reverse_iterator rend() const;
+        const_reverse_iterator rbegin() const;
+        const_reverse_iterator crend() const;
+        const_reverse_iterator crbegin() const;
+
+        const_reference front() const;
+        const_reference back() const;
+
+        const E& storage() const;
+
+    private:
+
+        const E& m_sequence;
+    };
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    sequence_view<E, Start, End>::sequence_view(const E& container)
+        : m_sequence(container)
+    {
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    template <std::ptrdiff_t OS, std::ptrdiff_t OE>
+    sequence_view<E, Start, End>::sequence_view(const sequence_view<E, OS, OE>& other)
+        : m_sequence(other.storage())
+    {
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    template <class T, class R>
+    sequence_view<E, Start, End>::operator T() const
+    {
+        T ret = xtl::make_sequence<T>(this->size());
+        std::copy(this->cbegin(), this->cend(), ret.begin());
+        return ret;
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    bool sequence_view<E, Start, End>::empty() const
+    {
+        return size() == size_type(0);
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::size() const -> size_type
+    {
+        if (End == -1)
+        {
+            return m_sequence.size() - static_cast<size_type>(Start);
+        }
+        else
+        {
+            return static_cast<size_type>(End - Start);
+        }
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::operator[](std::size_t idx) const -> const_reference
+    {
+        return m_sequence[idx + static_cast<std::size_t>(Start)];
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::end() const -> const_iterator
+    {
+        if (End != -1)
+        {
+            return m_sequence.begin() + End;
+        }
+        else
+        {
+            return m_sequence.end();
+        }
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::begin() const -> const_iterator
+    {
+        return m_sequence.begin() + Start;
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::cend() const -> const_iterator
+    {
+        return end();
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::cbegin() const -> const_iterator
+    {
+        return begin();
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::rend() const -> const_reverse_iterator
+    {
+        return const_reverse_iterator(begin());
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::rbegin() const -> const_reverse_iterator
+    {
+        return const_reverse_iterator(end());
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::crend() const -> const_reverse_iterator
+    {
+        return rend();
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::crbegin() const -> const_reverse_iterator
+    {
+        return rbegin();
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::front() const -> const_reference
+    {
+        return *(m_sequence.begin() + Start);
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    auto sequence_view<E, Start, End>::back() const -> const_reference
+    {
+        if (End == -1)
+        {
+            return m_sequence.back();
+        }
+        else
+        {
+            return m_sequence[static_cast<std::size_t>(End - 1)];
+        }
+    }
+
+    template <class E, std::ptrdiff_t Start, std::ptrdiff_t End>
+    const E& sequence_view<E, Start, End>::storage() const
+    {
+        return m_sequence;
+    }
+
+    template <class T, std::ptrdiff_t TB, std::ptrdiff_t TE>
+    inline bool operator==(const sequence_view<T, TB, TE>& lhs, const sequence_view<T, TB, TE>& rhs)
+    {
+        return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+    }
+
+    template <class T, std::ptrdiff_t TB, std::ptrdiff_t TE>
+    inline bool operator!=(const sequence_view<T, TB, TE>& lhs, const sequence_view<T, TB, TE>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+}
+
+/******************************
+ * std::tuple_size extensions *
+ ******************************/
+
+// The C++ standard defines tuple_size as a class, however
+// G++ 8 C++ library does define it as a struct hence we get
+// clang warnings here
+
+// Do not remove space between "#" and "pragma". This is required for CRAN checks.
+// clang-format off
+#if defined(__clang__)
+ # pragma clang diagnostic push
+ # pragma clang diagnostic ignored "-Wmismatched-tags"
+#endif
+// clang-format on
+
+namespace std
+{
+    template <class T, std::size_t N>
+    class tuple_size<xt::const_array<T, N>> : public integral_constant<std::size_t, N>
+    {
+    };
+
+    template <std::size_t... N>
+    class tuple_size<xt::fixed_shape<N...>> : public integral_constant<std::size_t, sizeof...(N)>
+    {
+    };
+
+    template <class T, std::ptrdiff_t Start, std::ptrdiff_t End>
+    class tuple_size<xt::sequence_view<T, Start, End>>
+        : public integral_constant<std::size_t, std::size_t(End - Start)>
+    {
+    };
+
+    // Undefine tuple size for not-known sequence view size
+    template <class T, std::ptrdiff_t Start>
+    class tuple_size<xt::sequence_view<T, Start, -1>>;
+}
+
+// Do not remove space between "#" and "pragma". This is required for CRAN checks.
+// clang-format off
+#if defined(__clang__)
+ # pragma clang diagnostic pop
+#endif
+// clang-format on
+
+#undef XTENSOR_CONST
+
+#endif

+ 921 - 0
3rd/numpy/include/xtensor/xstrided_view.hpp

@@ -0,0 +1,921 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_STRIDED_VIEW_HPP
+#define XTENSOR_STRIDED_VIEW_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xsequence.hpp>
+#include <xtl/xvariant.hpp>
+
+#include "xexpression.hpp"
+#include "xiterable.hpp"
+#include "xlayout.hpp"
+#include "xsemantic.hpp"
+#include "xstorage.hpp"
+#include "xstrided_view_base.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    /***************************
+     * xstrided_view extension *
+     ***************************/
+
+    namespace extension
+    {
+        template <class Tag, class CT, class S, layout_type L, class FST>
+        struct xstrided_view_base_impl;
+
+        template <class CT, class S, layout_type L, class FST>
+        struct xstrided_view_base_impl<xtensor_expression_tag, CT, S, L, FST>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class CT, class S, layout_type L, class FST>
+        struct xstrided_view_base : xstrided_view_base_impl<xexpression_tag_t<CT>, CT, S, L, FST>
+        {
+        };
+
+        template <class CT, class S, layout_type L, class FST>
+        using xstrided_view_base_t = typename xstrided_view_base<CT, S, L, FST>::type;
+    }
+
+    template <layout_type L1, layout_type L2, class T>
+    struct select_iterable_base
+    {
+        using type = std::conditional_t<L1 == L2 && L1 != layout_type::dynamic, xcontiguous_iterable<T>, xiterable<T>>;
+    };
+
+    template <layout_type L1, layout_type L2, class T>
+    using select_iterable_base_t = typename select_iterable_base<L1, L2, T>::type;
+
+
+    template <class CT, class S, layout_type L, class FST>
+    class xstrided_view;
+
+    template <class CT, class S, layout_type L, class FST>
+    struct xcontainer_inner_types<xstrided_view<CT, S, L, FST>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using undecay_expression = CT;
+        using reference = inner_reference_t<undecay_expression>;
+        using const_reference = typename xexpression_type::const_reference;
+        using size_type = typename xexpression_type::size_type;
+        using shape_type = std::decay_t<S>;
+        using undecay_shape = S;
+        using storage_getter = FST;
+        using inner_storage_type = typename storage_getter::type;
+        using temporary_type = typename detail::xtype_for_shape<
+            S>::template type<typename xexpression_type::value_type, xexpression_type::static_layout>;
+        using storage_type = std::remove_reference_t<inner_storage_type>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class CT, class S, layout_type L, class FST>
+    struct xiterable_inner_types<xstrided_view<CT, S, L, FST>>
+    {
+        using inner_shape_type = std::decay_t<S>;
+        using inner_strides_type = get_strides_t<inner_shape_type>;
+        using inner_backstrides_type_type = inner_strides_type;
+
+        using const_stepper = std::conditional_t<
+            is_indexed_stepper<typename std::decay_t<CT>::stepper>::value,
+            xindexed_stepper<const xstrided_view<CT, S, L, FST>, true>,
+            xstepper<const xstrided_view<CT, S, L, FST>>>;
+
+        using stepper = std::conditional_t<
+            is_indexed_stepper<typename std::decay_t<CT>::stepper>::value,
+            xindexed_stepper<xstrided_view<CT, S, L, FST>, false>,
+            xstepper<xstrided_view<CT, S, L, FST>>>;
+    };
+
+    template <class CT, class S, layout_type L, class FST, class RHS>
+    struct can_assign<xstrided_view<CT, S, L, FST>, RHS> : can_assign<CT, RHS>
+    {
+    };
+
+    /*****************
+     * xstrided_view *
+     *****************/
+
+    /**
+     * @class xstrided_view
+     * @brief View of an xexpression using strides
+     *
+     * The xstrided_view class implements a view utilizing an initial offset
+     * and strides.
+     *
+     * @tparam CT the closure type of the \ref xexpression type underlying this view
+     * @tparam L the layout of the strided view
+     * @tparam S the strides type of the strided view
+     * @tparam FST the flat storage type used for the strided view
+     *
+     * @sa strided_view, transpose
+     */
+    template <class CT, class S, layout_type L = layout_type::dynamic, class FST = detail::flat_storage_getter<CT, XTENSOR_DEFAULT_TRAVERSAL>>
+    class xstrided_view
+        : public xview_semantic<xstrided_view<CT, S, L, FST>>,
+          public select_iterable_base_t<L, std::decay_t<CT>::static_layout, xstrided_view<CT, S, L, FST>>,
+          private xstrided_view_base<xstrided_view<CT, S, L, FST>>,
+          public extension::xstrided_view_base_t<CT, S, L, FST>
+    {
+    public:
+
+        using self_type = xstrided_view<CT, S, L, FST>;
+        using base_type = xstrided_view_base<self_type>;
+        using semantic_base = xview_semantic<self_type>;
+        using extension_base = extension::xstrided_view_base_t<CT, S, L, FST>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        using xexpression_type = typename base_type::xexpression_type;
+        using base_type::is_const;
+
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+        using size_type = typename base_type::size_type;
+        using difference_type = typename base_type::difference_type;
+
+        using inner_storage_type = typename base_type::inner_storage_type;
+        using storage_type = typename base_type::storage_type;
+        using linear_iterator = typename storage_type::iterator;
+        using const_linear_iterator = typename storage_type::const_iterator;
+        using reverse_linear_iterator = std::reverse_iterator<linear_iterator>;
+        using const_reverse_linear_iterator = std::reverse_iterator<const_linear_iterator>;
+
+        using iterable_base = select_iterable_base_t<L, xexpression_type::static_layout, self_type>;
+        using inner_shape_type = typename base_type::inner_shape_type;
+        using inner_strides_type = typename base_type::inner_strides_type;
+        using inner_backstrides_type = typename base_type::inner_backstrides_type;
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        using base_type::contiguous_layout;
+        using base_type::static_layout;
+
+        using temporary_type = typename xcontainer_inner_types<self_type>::temporary_type;
+        using base_index_type = xindex_type_t<shape_type>;
+
+        using data_alignment = xt_simd::container_alignment_t<storage_type>;
+        using simd_type = xt_simd::simd_type<value_type>;
+        using simd_value_type = xt_simd::simd_type<value_type>;
+        using bool_load_type = typename base_type::bool_load_type;
+
+        template <class CTA, class SA>
+        xstrided_view(CTA&& e, SA&& shape, strides_type&& strides, std::size_t offset, layout_type layout) noexcept;
+
+        xstrided_view(const xstrided_view& rhs) = default;
+
+        self_type& operator=(const self_type&);
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+        using base_type::backstrides;
+        using base_type::dimension;
+        using base_type::is_contiguous;
+        using base_type::layout;
+        using base_type::shape;
+        using base_type::size;
+        using base_type::strides;
+
+        using base_type::operator();
+        using base_type::at;
+        using base_type::unchecked;
+        using base_type::operator[];
+        using base_type::data;
+        using base_type::data_offset;
+        using base_type::element;
+        using base_type::expression;
+        using base_type::storage;
+
+        using base_type::broadcast_shape;
+        using base_type::has_linear_assign;
+
+        template <class T>
+        void fill(const T& value);
+
+        linear_iterator linear_begin();
+        linear_iterator linear_end();
+        const_linear_iterator linear_begin() const;
+        const_linear_iterator linear_end() const;
+        const_linear_iterator linear_cbegin() const;
+        const_linear_iterator linear_cend() const;
+
+        reverse_linear_iterator linear_rbegin();
+        reverse_linear_iterator linear_rend();
+        const_reverse_linear_iterator linear_rbegin() const;
+        const_reverse_linear_iterator linear_rend() const;
+        const_reverse_linear_iterator linear_crbegin() const;
+        const_reverse_linear_iterator linear_crend() const;
+
+        template <class ST, class STEP = stepper>
+        disable_indexed_stepper_t<STEP> stepper_begin(const ST& shape);
+        template <class ST, class STEP = stepper>
+        disable_indexed_stepper_t<STEP> stepper_end(const ST& shape, layout_type l);
+
+        template <class ST, class STEP = stepper>
+        enable_indexed_stepper_t<STEP> stepper_begin(const ST& shape);
+        template <class ST, class STEP = stepper>
+        enable_indexed_stepper_t<STEP> stepper_end(const ST& shape, layout_type l);
+
+        template <class ST, class STEP = const_stepper>
+        disable_indexed_stepper_t<STEP> stepper_begin(const ST& shape) const;
+        template <class ST, class STEP = const_stepper>
+        disable_indexed_stepper_t<STEP> stepper_end(const ST& shape, layout_type l) const;
+
+        template <class ST, class STEP = const_stepper>
+        enable_indexed_stepper_t<STEP> stepper_begin(const ST& shape) const;
+        template <class ST, class STEP = const_stepper>
+        enable_indexed_stepper_t<STEP> stepper_end(const ST& shape, layout_type l) const;
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        template <class T, class R>
+        using enable_simd_interface = std::enable_if_t<has_simd_interface<T>::value && L != layout_type::dynamic, R>;
+
+        template <class align, class simd, class T = xexpression_type>
+        enable_simd_interface<T, void> store_simd(size_type i, const simd& e);
+        template <
+            class align,
+            class requested_type = value_type,
+            std::size_t N = xt_simd::simd_traits<requested_type>::size,
+            class T = xexpression_type>
+        enable_simd_interface<T, simd_return_type<requested_type>> load_simd(size_type i) const;
+
+        reference data_element(size_type i);
+        const_reference data_element(size_type i) const;
+
+        reference flat(size_type i);
+        const_reference flat(size_type i) const;
+
+        using container_iterator = std::
+            conditional_t<is_const, typename storage_type::const_iterator, typename storage_type::iterator>;
+        using const_container_iterator = typename storage_type::const_iterator;
+
+        template <class E>
+        using rebind_t = xstrided_view<E, S, L, typename FST::template rebind_t<E>>;
+
+        template <class E>
+        rebind_t<E> build_view(E&& e) const;
+
+    private:
+
+        container_iterator data_xbegin() noexcept;
+        const_container_iterator data_xbegin() const noexcept;
+        container_iterator data_xend(layout_type l, size_type offset) noexcept;
+        const_container_iterator data_xend(layout_type l, size_type offset) const noexcept;
+
+        template <class It>
+        It data_xbegin_impl(It begin) const noexcept;
+
+        template <class It>
+        It data_xend_impl(It end, layout_type l, size_type offset) const noexcept;
+
+        void assign_temporary_impl(temporary_type&& tmp);
+
+        using base_type::set_offset;
+
+        template <class C>
+        friend class xstepper;
+        friend class xview_semantic<self_type>;
+        friend class xaccessible<self_type>;
+        friend class xconst_accessible<self_type>;
+        template <class D>
+        friend class xaxis_iterator;
+        template <class D>
+        friend class xaxis_slice_iterator;
+    };
+
+    /**************************
+     * xstrided_view builders *
+     **************************/
+
+    template <class T>
+    using xstrided_slice = xtl::variant<
+        T,
+
+        xrange_adaptor<placeholders::xtuph, T, T>,
+        xrange_adaptor<T, placeholders::xtuph, T>,
+        xrange_adaptor<T, T, placeholders::xtuph>,
+
+        xrange_adaptor<T, placeholders::xtuph, placeholders::xtuph>,
+        xrange_adaptor<placeholders::xtuph, T, placeholders::xtuph>,
+        xrange_adaptor<placeholders::xtuph, placeholders::xtuph, T>,
+
+        xrange_adaptor<T, T, T>,
+        xrange_adaptor<placeholders::xtuph, placeholders::xtuph, placeholders::xtuph>,
+
+        xrange<T>,
+        xstepped_range<T>,
+
+        xall_tag,
+        xellipsis_tag,
+        xnewaxis_tag>;
+
+    /**
+     * @typedef xstrided_slice_vector
+     * @brief vector of slices used to build a `xstrided_view`
+     */
+    using xstrided_slice_vector = std::vector<xstrided_slice<std::ptrdiff_t>>;
+
+    template <layout_type L = layout_type::dynamic, class E, class S, class X>
+    auto strided_view(E&& e, S&& shape, X&& stride, std::size_t offset = 0, layout_type layout = L) noexcept;
+
+    template <class E>
+    auto strided_view(E&& e, const xstrided_slice_vector& slices);
+
+    /********************************
+     * xstrided_view implementation *
+     ********************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs an xstrided_view
+     *
+     * @param e the underlying xexpression for this view
+     * @param shape the shape of the view
+     * @param strides the strides of the view
+     * @param offset the offset of the first element in the underlying container
+     * @param layout the layout of the view
+     */
+    template <class CT, class S, layout_type L, class FST>
+    template <class CTA, class SA>
+    inline xstrided_view<CT, S, L, FST>::xstrided_view(
+        CTA&& e,
+        SA&& shape,
+        strides_type&& strides,
+        std::size_t offset,
+        layout_type layout
+    ) noexcept
+        : base_type(std::forward<CTA>(e), std::forward<SA>(shape), std::move(strides), offset, layout)
+    {
+    }
+
+    //@}
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::operator=(const self_type& rhs) -> self_type&
+    {
+        temporary_type tmp(rhs);
+        return this->assign_temporary(std::move(tmp));
+    }
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class CT, class S, layout_type L, class FST>
+    template <class E>
+    inline auto xstrided_view<CT, S, L, FST>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class E>
+    inline auto xstrided_view<CT, S, L, FST>::operator=(const E& e) -> disable_xexpression<E, self_type>&
+    {
+        this->fill(e);
+        return *this;
+    }
+
+    namespace xstrided_view_detail
+    {
+        template <class V, class T>
+        inline void run_assign_temporary_impl(V& v, const T& t, std::true_type /* enable strided assign */)
+        {
+            strided_loop_assigner<true>::run(v, t);
+        }
+
+        template <class V, class T>
+        inline void
+        run_assign_temporary_impl(V& v, const T& t, std::false_type /* fallback to iterator assign */)
+        {
+            std::copy(t.cbegin(), t.cend(), v.begin());
+        }
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline void xstrided_view<CT, S, L, FST>::assign_temporary_impl(temporary_type&& tmp)
+    {
+        constexpr bool
+            fast_assign = xassign_traits<xstrided_view<CT, S, L, FST>, temporary_type>::simd_strided_assign();
+        xstrided_view_detail::run_assign_temporary_impl(*this, tmp, std::integral_constant<bool, fast_assign>{});
+    }
+
+    /**
+     * @name Data
+     */
+    //@{
+
+    /**
+     * Fills the view with the given value.
+     * @param value the value to fill the view with.
+     */
+    template <class CT, class S, layout_type L, class FST>
+    template <class T>
+    inline void xstrided_view<CT, S, L, FST>::fill(const T& value)
+    {
+        if (layout() != layout_type::dynamic)
+        {
+            std::fill(this->linear_begin(), this->linear_end(), value);
+        }
+        else
+        {
+            std::fill(this->begin(), this->end(), value);
+        }
+    }
+
+    //@}
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::data_element(size_type i) -> reference
+    {
+        return storage()[i];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::data_element(size_type i) const -> const_reference
+    {
+        return storage()[i];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::flat(size_type i) -> reference
+    {
+        return storage()[i];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::flat(size_type i) const -> const_reference
+    {
+        return storage()[i];
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_begin() -> linear_iterator
+    {
+        return this->storage().begin() + static_cast<std::ptrdiff_t>(data_offset());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_end() -> linear_iterator
+    {
+        return this->storage().begin() + static_cast<std::ptrdiff_t>(data_offset() + size());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_begin() const -> const_linear_iterator
+    {
+        return this->linear_cbegin();
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_end() const -> const_linear_iterator
+    {
+        return this->linear_cend();
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_cbegin() const -> const_linear_iterator
+    {
+        return this->storage().cbegin() + static_cast<std::ptrdiff_t>(data_offset());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_cend() const -> const_linear_iterator
+    {
+        return this->storage().cbegin() + static_cast<std::ptrdiff_t>(data_offset() + size());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_rbegin() -> reverse_linear_iterator
+    {
+        return reverse_linear_iterator(this->linear_begin());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_rend() -> reverse_linear_iterator
+    {
+        return reverse_linear_iterator(this->linear_end());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_rbegin() const -> const_reverse_linear_iterator
+    {
+        return this->linear_crbegin();
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_rend() const -> const_reverse_linear_iterator
+    {
+        return this->linear_crend();
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_crbegin() const -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(this->linear_cbegin());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::linear_crend() const -> const_reverse_linear_iterator
+    {
+        return const_reverse_linear_iterator(this->linear_cend());
+    }
+
+    /***************
+     * stepper api *
+     ***************/
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_begin(const ST& shape) -> disable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, data_xbegin(), offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_end(const ST& shape, layout_type l)
+        -> disable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, data_xend(l, offset), offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_begin(const ST& shape) -> enable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_end(const ST& shape, layout_type /*l*/)
+        -> enable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return stepper(this, offset, true);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_begin(const ST& shape) const
+        -> disable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, data_xbegin(), offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_end(const ST& shape, layout_type l) const
+        -> disable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, data_xend(l, offset), offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_begin(const ST& shape) const
+        -> enable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class ST, class STEP>
+    inline auto xstrided_view<CT, S, L, FST>::stepper_end(const ST& shape, layout_type /*l*/) const
+        -> enable_indexed_stepper_t<STEP>
+    {
+        size_type offset = shape.size() - dimension();
+        return const_stepper(this, offset, true);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class It>
+    inline It xstrided_view<CT, S, L, FST>::data_xbegin_impl(It begin) const noexcept
+    {
+        return begin + static_cast<std::ptrdiff_t>(this->data_offset());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class It>
+    inline It
+    xstrided_view<CT, S, L, FST>::data_xend_impl(It begin, layout_type l, size_type offset) const noexcept
+    {
+        return strided_data_end(*this, begin + std::ptrdiff_t(this->data_offset()), l, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::data_xbegin() noexcept -> container_iterator
+    {
+        return data_xbegin_impl(this->storage().begin());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::data_xbegin() const noexcept -> const_container_iterator
+    {
+        return data_xbegin_impl(this->storage().cbegin());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::data_xend(layout_type l, size_type offset) noexcept
+        -> container_iterator
+    {
+        return data_xend_impl(this->storage().begin(), l, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    inline auto xstrided_view<CT, S, L, FST>::data_xend(layout_type l, size_type offset) const noexcept
+        -> const_container_iterator
+    {
+        return data_xend_impl(this->storage().cbegin(), l, offset);
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class alignment, class simd, class T>
+    inline auto xstrided_view<CT, S, L, FST>::store_simd(size_type i, const simd& e)
+        -> enable_simd_interface<T, void>
+    {
+        using align_mode = driven_align_mode_t<alignment, data_alignment>;
+        xt_simd::store_as(&(storage()[i]), e, align_mode());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class alignment, class requested_type, std::size_t N, class T>
+    inline auto xstrided_view<CT, S, L, FST>::load_simd(size_type i) const
+        -> enable_simd_interface<T, simd_return_type<requested_type>>
+    {
+        using align_mode = driven_align_mode_t<alignment, data_alignment>;
+        return xt_simd::load_as<requested_type>(&(storage()[i]), align_mode());
+    }
+
+    template <class CT, class S, layout_type L, class FST>
+    template <class E>
+    inline auto xstrided_view<CT, S, L, FST>::build_view(E&& e) const -> rebind_t<E>
+    {
+        inner_shape_type sh(this->shape());
+        inner_strides_type str(this->strides());
+        return rebind_t<E>(
+            std::forward<E>(e),
+            std::move(sh),
+            std::move(str),
+            base_type::data_offset(),
+            this->layout()
+        );
+    }
+
+    /*****************************************
+     * xstrided_view builders implementation *
+     *****************************************/
+
+    /**
+     * Construct a strided view from an xexpression, shape, strides and offset.
+     *
+     * @param e xexpression
+     * @param shape the shape of the view
+     * @param strides the new strides of the view
+     * @param offset the offset of the first element in the underlying container
+     * @param layout the new layout of the expression
+     *
+     * @tparam L the static layout type of the view (default: dynamic)
+     * @tparam E type of xexpression
+     * @tparam S strides type
+     * @tparam X strides type
+     *
+     * @return the view
+     */
+    template <layout_type L, class E, class S, class X>
+    inline auto strided_view(E&& e, S&& shape, X&& strides, std::size_t offset, layout_type layout) noexcept
+    {
+        using view_type = xstrided_view<xclosure_t<E>, S, L>;
+        return view_type(std::forward<E>(e), std::forward<S>(shape), std::forward<X>(strides), offset, layout);
+    }
+
+    namespace detail
+    {
+        struct no_adj_strides_policy
+        {
+        protected:
+
+            inline void resize(std::size_t)
+            {
+            }
+
+            inline void set_fake_slice(std::size_t)
+            {
+            }
+
+            template <class ST, class S>
+            bool fill_args(
+                const xstrided_slice_vector& /*slices*/,
+                std::size_t /*sl_idx*/,
+                std::size_t /*i*/,
+                std::size_t /*old_shape*/,
+                const ST& /*old_stride*/,
+                S& /*shape*/,
+                get_strides_t<S>& /*strides*/
+            )
+            {
+                return false;
+            }
+        };
+    }
+
+    /**
+     * Function to create a dynamic view from
+     * an xexpression and an xstrided_slice_vector.
+     *
+     * @param e xexpression
+     * @param slices the slice vector
+     *
+     * @return initialized strided_view according to slices
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {{1, 2, 3}, {4, 5, 6}};
+     * xt::xstrided_slice_vector sv({xt::range(0, 1)});
+     * sv.push_back(xt::range(0, 3, 2));
+     * auto v = xt::strided_view(a, sv);
+     * // ==> {{1, 3}}
+     * @endcode
+     *
+     * You can also achieve the same with the following short-hand syntax:
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {{1, 2, 3}, {4, 5, 6}};
+     * auto v = xt::strided_view(a, {xt::range(0, 1), xt::range(0, 3, 2)});
+     * // ==> {{1, 3}}
+     * @endcode
+     */
+    template <class E>
+    inline auto strided_view(E&& e, const xstrided_slice_vector& slices)
+    {
+        detail::strided_view_args<detail::no_adj_strides_policy> args;
+        args.fill_args(
+            e.shape(),
+            detail::get_strides<XTENSOR_DEFAULT_TRAVERSAL>(e),
+            detail::get_offset<XTENSOR_DEFAULT_TRAVERSAL>(e),
+            e.layout(),
+            slices
+        );
+        using view_type = xstrided_view<xclosure_t<E>, decltype(args.new_shape)>;
+        return view_type(
+            std::forward<E>(e),
+            std::move(args.new_shape),
+            std::move(args.new_strides),
+            args.new_offset,
+            args.new_layout
+        );
+    }
+
+    namespace detail
+    {
+        template <typename S>
+        struct rebind_shape;
+
+        template <std::size_t... X>
+        struct rebind_shape<xt::fixed_shape<X...>>
+        {
+            using type = xt::fixed_shape<X...>;
+        };
+
+        template <class S>
+        struct rebind_shape
+        {
+            using type = rebind_container_t<size_t, S>;
+        };
+
+        template <
+            class S,
+            std::enable_if_t<std::is_signed<get_value_type_t<typename std::decay<S>::type>>::value, bool> = true>
+        inline void recalculate_shape_impl(S& shape, size_t size)
+        {
+            using value_type = get_value_type_t<typename std::decay_t<S>>;
+            XTENSOR_ASSERT(std::count(shape.cbegin(), shape.cend(), -1) <= 1);
+            auto iter = std::find(shape.begin(), shape.end(), -1);
+            if (iter != std::end(shape))
+            {
+                const auto total = std::accumulate(shape.cbegin(), shape.cend(), -1, std::multiplies<int>{});
+                const auto missing_dimension = size / total;
+                (*iter) = static_cast<value_type>(missing_dimension);
+            }
+        }
+
+        template <
+            class S,
+            std::enable_if_t<!std::is_signed<get_value_type_t<typename std::decay<S>::type>>::value, bool> = true>
+        inline void recalculate_shape_impl(S&, size_t)
+        {
+        }
+
+        template <class S>
+        inline auto recalculate_shape(S&& shape, size_t size)
+        {
+            return recalculate_shape_impl(shape, size);
+        }
+    }
+
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E, class S>
+    inline auto reshape_view(E&& e, S&& shape)
+    {
+        static_assert(
+            L == layout_type::row_major || L == layout_type::column_major,
+            "traversal has to be row or column major"
+        );
+
+        using shape_type = std::decay_t<decltype(shape)>;
+        using unsigned_shape_type = typename detail::rebind_shape<shape_type>::type;
+        get_strides_t<unsigned_shape_type> strides;
+
+        detail::recalculate_shape(shape, e.size());
+        xt::resize_container(strides, shape.size());
+        compute_strides(shape, L, strides);
+        constexpr auto computed_layout = std::decay_t<E>::static_layout == L ? L : layout_type::dynamic;
+        using view_type = xstrided_view<
+            xclosure_t<E>,
+            unsigned_shape_type,
+            computed_layout,
+            detail::flat_adaptor_getter<xclosure_t<E>, L>>;
+        return view_type(
+            std::forward<E>(e),
+            xtl::forward_sequence<unsigned_shape_type, S>(shape),
+            std::move(strides),
+            0,
+            e.layout()
+        );
+    }
+
+    /**
+     * @deprecated
+     * @brief Return a view on a container with a new shape
+     *
+     * Note: if you resize the underlying container, this view becomes
+     * invalidated.
+     *
+     * @param e xexpression to reshape
+     * @param shape new shape
+     * @param order traversal order (optional)
+     *
+     * @return view on xexpression with new shape
+     */
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E, class S>
+    inline auto reshape_view(E&& e, S&& shape, layout_type /*order*/)
+    {
+        return reshape_view<L>(std::forward<E>(e), std::forward<S>(shape));
+    }
+
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E, class I, std::size_t N>
+    inline auto reshape_view(E&& e, const I (&shape)[N], layout_type order)
+    {
+        using shape_type = std::array<std::size_t, N>;
+        return reshape_view<L>(std::forward<E>(e), xtl::forward_sequence<shape_type, decltype(shape)>(shape), order);
+    }
+
+    template <layout_type L = XTENSOR_DEFAULT_TRAVERSAL, class E, class I, std::size_t N>
+    inline auto reshape_view(E&& e, const I (&shape)[N])
+    {
+        using shape_type = std::array<I, N>;
+        return reshape_view<L>(std::forward<E>(e), xtl::forward_sequence<shape_type, decltype(shape)>(shape));
+    }
+}
+
+#endif

+ 970 - 0
3rd/numpy/include/xtensor/xstrided_view_base.hpp

@@ -0,0 +1,970 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_STRIDED_VIEW_BASE_HPP
+#define XTENSOR_STRIDED_VIEW_BASE_HPP
+
+#include <type_traits>
+
+#include <xtl/xsequence.hpp>
+#include <xtl/xvariant.hpp>
+
+#include "xaccessible.hpp"
+#include "xslice.hpp"
+#include "xstrides.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_forward.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+    namespace detail
+    {
+        template <class CT, layout_type L>
+        class flat_expression_adaptor
+        {
+        public:
+
+            using xexpression_type = std::decay_t<CT>;
+            using shape_type = typename xexpression_type::shape_type;
+            using inner_strides_type = get_strides_t<shape_type>;
+            using index_type = inner_strides_type;
+            using size_type = typename xexpression_type::size_type;
+            using value_type = typename xexpression_type::value_type;
+            using const_reference = typename xexpression_type::const_reference;
+            using reference = std::conditional_t<
+                std::is_const<std::remove_reference_t<CT>>::value,
+                typename xexpression_type::const_reference,
+                typename xexpression_type::reference>;
+
+            using iterator = decltype(std::declval<std::remove_reference_t<CT>>().template begin<L>());
+            using const_iterator = decltype(std::declval<std::decay_t<CT>>().template cbegin<L>());
+            using reverse_iterator = decltype(std::declval<std::remove_reference_t<CT>>().template rbegin<L>());
+            using const_reverse_iterator = decltype(std::declval<std::decay_t<CT>>().template crbegin<L>());
+
+            explicit flat_expression_adaptor(CT* e);
+
+            template <class FST>
+            flat_expression_adaptor(CT* e, FST&& strides);
+
+            void update_pointer(CT* ptr) const;
+
+            size_type size() const;
+            reference operator[](size_type idx);
+            const_reference operator[](size_type idx) const;
+
+            iterator begin();
+            iterator end();
+            const_iterator begin() const;
+            const_iterator end() const;
+            const_iterator cbegin() const;
+            const_iterator cend() const;
+
+        private:
+
+            static index_type& get_index();
+
+            mutable CT* m_e;
+            inner_strides_type m_strides;
+            size_type m_size;
+        };
+
+        template <class T>
+        struct is_flat_expression_adaptor : std::false_type
+        {
+        };
+
+        template <class CT, layout_type L>
+        struct is_flat_expression_adaptor<flat_expression_adaptor<CT, L>> : std::true_type
+        {
+        };
+
+        template <class E, class ST>
+        struct provides_data_interface
+            : xtl::conjunction<has_data_interface<std::decay_t<E>>, xtl::negation<is_flat_expression_adaptor<ST>>>
+        {
+        };
+    }
+
+    template <class D>
+    class xstrided_view_base : public xaccessible<D>
+    {
+    public:
+
+        using base_type = xaccessible<D>;
+        using inner_types = xcontainer_inner_types<D>;
+        using xexpression_type = typename inner_types::xexpression_type;
+        using undecay_expression = typename inner_types::undecay_expression;
+        static constexpr bool is_const = std::is_const<std::remove_reference_t<undecay_expression>>::value;
+
+        using value_type = typename xexpression_type::value_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = std::
+            conditional_t<is_const, typename xexpression_type::const_pointer, typename xexpression_type::pointer>;
+        using const_pointer = typename xexpression_type::const_pointer;
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+
+        using storage_getter = typename inner_types::storage_getter;
+        using inner_storage_type = typename inner_types::inner_storage_type;
+        using storage_type = std::remove_reference_t<inner_storage_type>;
+
+        using shape_type = typename inner_types::shape_type;
+        using strides_type = get_strides_t<shape_type>;
+        using backstrides_type = strides_type;
+
+        using inner_shape_type = shape_type;
+        using inner_strides_type = strides_type;
+        using inner_backstrides_type = backstrides_type;
+
+        using undecay_shape = typename inner_types::undecay_shape;
+
+        using simd_value_type = xt_simd::simd_type<value_type>;
+        using bool_load_type = typename xexpression_type::bool_load_type;
+
+        static constexpr layout_type static_layout = inner_types::layout;
+        static constexpr bool contiguous_layout = static_layout != layout_type::dynamic
+                                                  && xexpression_type::contiguous_layout;
+
+        template <class CTA, class SA>
+        xstrided_view_base(CTA&& e, SA&& shape, strides_type&& strides, size_type offset, layout_type layout) noexcept;
+
+        xstrided_view_base(xstrided_view_base&& rhs);
+
+        xstrided_view_base(const xstrided_view_base& rhs);
+
+        const inner_shape_type& shape() const noexcept;
+        const inner_strides_type& strides() const noexcept;
+        const inner_backstrides_type& backstrides() const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+        using base_type::shape;
+
+        reference operator()();
+        const_reference operator()() const;
+
+        template <class... Args>
+        reference operator()(Args... args);
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+
+        template <class... Args>
+        reference unchecked(Args... args);
+
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+
+        template <class It>
+        reference element(It first, It last);
+
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        storage_type& storage() noexcept;
+        const storage_type& storage() const noexcept;
+
+        template <class E = xexpression_type, class ST = storage_type>
+        std::enable_if_t<detail::provides_data_interface<E, ST>::value, pointer> data() noexcept;
+        template <class E = xexpression_type, class ST = storage_type>
+        std::enable_if_t<detail::provides_data_interface<E, ST>::value, const_pointer> data() const noexcept;
+        size_type data_offset() const noexcept;
+
+        xexpression_type& expression() noexcept;
+        const xexpression_type& expression() const noexcept;
+
+        template <class O>
+        bool broadcast_shape(O& shape, bool reuse_cache = false) const;
+
+        template <class O>
+        bool has_linear_assign(const O& strides) const noexcept;
+
+    protected:
+
+        using offset_type = typename strides_type::value_type;
+
+        template <class... Args>
+        offset_type compute_index(Args... args) const;
+
+        template <class... Args>
+        offset_type compute_unchecked_index(Args... args) const;
+
+        template <class It>
+        offset_type compute_element_index(It first, It last) const;
+
+        void set_offset(size_type offset);
+
+    private:
+
+        undecay_expression m_e;
+        inner_storage_type m_storage;
+        inner_shape_type m_shape;
+        inner_strides_type m_strides;
+        inner_backstrides_type m_backstrides;
+        size_type m_offset;
+        layout_type m_layout;
+    };
+
+    /***************************
+     * flat_expression_adaptor *
+     ***************************/
+
+    namespace detail
+    {
+        template <class CT>
+        struct inner_storage_getter
+        {
+            using type = decltype(std::declval<CT>().storage());
+            using reference = std::add_lvalue_reference_t<CT>;
+
+            template <class E>
+            using rebind_t = inner_storage_getter<E>;
+
+            static decltype(auto) get_flat_storage(reference e)
+            {
+                return e.storage();
+            }
+
+            static auto get_offset(reference e)
+            {
+                return e.data_offset();
+            }
+
+            static decltype(auto) get_strides(reference e)
+            {
+                return e.strides();
+            }
+        };
+
+        template <class CT, layout_type L>
+        struct flat_adaptor_getter
+        {
+            using type = flat_expression_adaptor<std::remove_reference_t<CT>, L>;
+            using reference = std::add_lvalue_reference_t<CT>;
+
+            template <class E>
+            using rebind_t = flat_adaptor_getter<E, L>;
+
+            static type get_flat_storage(reference e)
+            {
+                // moved to addressof because ampersand on xview returns a closure pointer
+                return type(std::addressof(e));
+            }
+
+            static auto get_offset(reference)
+            {
+                return typename std::decay_t<CT>::size_type(0);
+            }
+
+            static auto get_strides(reference e)
+            {
+                dynamic_shape<std::ptrdiff_t> strides;
+                strides.resize(e.shape().size());
+                compute_strides(e.shape(), L, strides);
+                return strides;
+            }
+        };
+
+        template <class CT, layout_type L>
+        using flat_storage_getter = std::conditional_t<
+            has_data_interface<std::decay_t<CT>>::value,
+            inner_storage_getter<CT>,
+            flat_adaptor_getter<CT, L>>;
+
+        template <layout_type L, class E>
+        inline auto get_offset(E& e)
+        {
+            return flat_storage_getter<E, L>::get_offset(e);
+        }
+
+        template <layout_type L, class E>
+        inline decltype(auto) get_strides(E& e)
+        {
+            return flat_storage_getter<E, L>::get_strides(e);
+        }
+    }
+
+    /*************************************
+     * xstrided_view_base implementation *
+     *************************************/
+
+    /**
+     * @name Constructor
+     */
+    //@{
+    /**
+     * Constructs an xstrided_view_base
+     *
+     * @param e the underlying xexpression for this view
+     * @param shape the shape of the view
+     * @param strides the strides of the view
+     * @param offset the offset of the first element in the underlying container
+     * @param layout the layout of the view
+     */
+    template <class D>
+    template <class CTA, class SA>
+    inline xstrided_view_base<D>::xstrided_view_base(
+        CTA&& e,
+        SA&& shape,
+        strides_type&& strides,
+        size_type offset,
+        layout_type layout
+    ) noexcept
+        : m_e(std::forward<CTA>(e))
+        ,
+        // m_storage(detail::get_flat_storage<undecay_expression>(m_e)),
+        m_storage(storage_getter::get_flat_storage(m_e))
+        , m_shape(std::forward<SA>(shape))
+        , m_strides(std::move(strides))
+        , m_offset(offset)
+        , m_layout(layout)
+    {
+        m_backstrides = xtl::make_sequence<backstrides_type>(m_shape.size(), 0);
+        adapt_strides(m_shape, m_strides, m_backstrides);
+    }
+
+    namespace detail
+    {
+        template <class T, class S>
+        auto& copy_move_storage(T& expr, const S& /*storage*/)
+        {
+            return expr.storage();
+        }
+
+        template <class T, class E, layout_type L>
+        auto copy_move_storage(T& expr, const detail::flat_expression_adaptor<E, L>& storage)
+        {
+            detail::flat_expression_adaptor<E, L> new_storage = storage;  // copy storage
+            new_storage.update_pointer(std::addressof(expr));
+            return new_storage;
+        }
+    }
+
+    template <class D>
+    inline xstrided_view_base<D>::xstrided_view_base(xstrided_view_base&& rhs)
+        : base_type(std::move(rhs))
+        , m_e(std::forward<undecay_expression>(rhs.m_e))
+        , m_storage(detail::copy_move_storage(m_e, rhs.m_storage))
+        , m_shape(std::move(rhs.m_shape))
+        , m_strides(std::move(rhs.m_strides))
+        , m_backstrides(std::move(rhs.m_backstrides))
+        , m_offset(std::move(rhs.m_offset))
+        , m_layout(std::move(rhs.m_layout))
+    {
+    }
+
+    template <class D>
+    inline xstrided_view_base<D>::xstrided_view_base(const xstrided_view_base& rhs)
+        : base_type(rhs)
+        , m_e(rhs.m_e)
+        , m_storage(detail::copy_move_storage(m_e, rhs.m_storage))
+        , m_shape(rhs.m_shape)
+        , m_strides(rhs.m_strides)
+        , m_backstrides(rhs.m_backstrides)
+        , m_offset(rhs.m_offset)
+        , m_layout(rhs.m_layout)
+    {
+    }
+
+    //@}
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the shape of the xtrided_view_base.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    /**
+     * Returns the strides of the xtrided_view_base.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::strides() const noexcept -> const inner_strides_type&
+    {
+        return m_strides;
+    }
+
+    /**
+     * Returns the backstrides of the xtrided_view_base.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::backstrides() const noexcept -> const inner_backstrides_type&
+    {
+        return m_backstrides;
+    }
+
+    /**
+     * Returns the layout of the xtrided_view_base.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::layout() const noexcept -> layout_type
+    {
+        return m_layout;
+    }
+
+    template <class D>
+    inline bool xstrided_view_base<D>::is_contiguous() const noexcept
+    {
+        return m_layout != layout_type::dynamic && m_e.is_contiguous();
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+    //@{
+    template <class D>
+    inline auto xstrided_view_base<D>::operator()() -> reference
+    {
+        return m_storage[static_cast<size_type>(m_offset)];
+    }
+
+    template <class D>
+    inline auto xstrided_view_base<D>::operator()() const -> const_reference
+    {
+        return m_storage[static_cast<size_type>(m_offset)];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the view.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xstrided_view_base<D>::operator()(Args... args) -> reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        offset_type index = compute_index(args...);
+        return m_storage[static_cast<size_type>(index)];
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices
+     * must be unsigned integers, the number of indices should be equal or greater than
+     * the number of dimensions of the view.
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xstrided_view_base<D>::operator()(Args... args) const -> const_reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        offset_type index = compute_index(args...);
+        return m_storage[static_cast<size_type>(index)];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the view, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xstrided_view_base<D>::unchecked(Args... args) -> reference
+    {
+        offset_type index = compute_unchecked_index(args...);
+        return m_storage[static_cast<size_type>(index)];
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the view, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.uncheked(0, 1);
+     * @endcode
+     */
+    template <class D>
+    template <class... Args>
+    inline auto xstrided_view_base<D>::unchecked(Args... args) const -> const_reference
+    {
+        offset_type index = compute_unchecked_index(args...);
+        return m_storage[static_cast<size_type>(index)];
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the view.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater than the the number
+     * of dimensions of the view..
+     */
+    template <class D>
+    template <class It>
+    inline auto xstrided_view_base<D>::element(It first, It last) -> reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return m_storage[static_cast<size_type>(compute_element_index(first, last))];
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the view.
+     * @param first iterator starting the sequence of indices
+     * @param last iterator ending the sequence of indices
+     * The number of indices in the sequence should be equal to or greater than the the number
+     * of dimensions of the view..
+     */
+    template <class D>
+    template <class It>
+    inline auto xstrided_view_base<D>::element(It first, It last) const -> const_reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        return m_storage[static_cast<size_type>(compute_element_index(first, last))];
+    }
+
+    /**
+     * Returns a reference to the buffer containing the elements of the view.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::storage() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    /**
+     * Returns a constant reference to the buffer containing the elements of the view.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::storage() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    /**
+     * Returns a pointer to the underlying array serving as element storage.
+     * The first element of the view is at data() + data_offset().
+     */
+    template <class D>
+    template <class E, class ST>
+    inline auto xstrided_view_base<D>::data() noexcept
+        -> std::enable_if_t<detail::provides_data_interface<E, ST>::value, pointer>
+    {
+        return m_e.data();
+    }
+
+    /**
+     * Returns a constant pointer to the underlying array serving as element storage.
+     * The first element of the view is at data() + data_offset().
+     */
+    template <class D>
+    template <class E, class ST>
+    inline auto xstrided_view_base<D>::data() const noexcept
+        -> std::enable_if_t<detail::provides_data_interface<E, ST>::value, const_pointer>
+    {
+        return m_e.data();
+    }
+
+    /**
+     * Returns the offset to the first element in the view.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::data_offset() const noexcept -> size_type
+    {
+        return m_offset;
+    }
+
+    /**
+     * Returns a reference to the underlying expression of the view.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::expression() noexcept -> xexpression_type&
+    {
+        return m_e;
+    }
+
+    /**
+     * Returns a constant reference to the underlying expression of the view.
+     */
+    template <class D>
+    inline auto xstrided_view_base<D>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_e;
+    }
+
+    //@}
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the view to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class D>
+    template <class O>
+    inline bool xstrided_view_base<D>::broadcast_shape(O& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    /**
+     * Checks whether the xstrided_view_base can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class D>
+    template <class O>
+    inline bool xstrided_view_base<D>::has_linear_assign(const O& str) const noexcept
+    {
+        return has_data_interface<xexpression_type>::value && str.size() == strides().size()
+               && std::equal(str.cbegin(), str.cend(), strides().begin());
+    }
+
+    //@}
+
+    template <class D>
+    template <class... Args>
+    inline auto xstrided_view_base<D>::compute_index(Args... args) const -> offset_type
+    {
+        return static_cast<offset_type>(m_offset)
+               + xt::data_offset<offset_type>(strides(), static_cast<offset_type>(args)...);
+    }
+
+    template <class D>
+    template <class... Args>
+    inline auto xstrided_view_base<D>::compute_unchecked_index(Args... args) const -> offset_type
+    {
+        return static_cast<offset_type>(m_offset)
+               + xt::unchecked_data_offset<offset_type>(strides(), static_cast<offset_type>(args)...);
+    }
+
+    template <class D>
+    template <class It>
+    inline auto xstrided_view_base<D>::compute_element_index(It first, It last) const -> offset_type
+    {
+        return static_cast<offset_type>(m_offset) + xt::element_offset<offset_type>(strides(), first, last);
+    }
+
+    template <class D>
+    void xstrided_view_base<D>::set_offset(size_type offset)
+    {
+        m_offset = offset;
+    }
+
+    /******************************************
+     * flat_expression_adaptor implementation *
+     ******************************************/
+
+    namespace detail
+    {
+        template <class CT, layout_type L>
+        inline flat_expression_adaptor<CT, L>::flat_expression_adaptor(CT* e)
+            : m_e(e)
+        {
+            resize_container(get_index(), m_e->dimension());
+            resize_container(m_strides, m_e->dimension());
+            m_size = compute_size(m_e->shape());
+            compute_strides(m_e->shape(), L, m_strides);
+        }
+
+        template <class CT, layout_type L>
+        template <class FST>
+        inline flat_expression_adaptor<CT, L>::flat_expression_adaptor(CT* e, FST&& strides)
+            : m_e(e)
+            , m_strides(xtl::forward_sequence<inner_strides_type, FST>(strides))
+        {
+            resize_container(get_index(), m_e->dimension());
+            m_size = m_e->size();
+        }
+
+        template <class CT, layout_type L>
+        inline void flat_expression_adaptor<CT, L>::update_pointer(CT* ptr) const
+        {
+            m_e = ptr;
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::size() const -> size_type
+        {
+            return m_size;
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::operator[](size_type idx) -> reference
+        {
+            auto i = static_cast<typename index_type::value_type>(idx);
+            get_index() = detail::unravel_noexcept(i, m_strides, L);
+            return m_e->element(get_index().cbegin(), get_index().cend());
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::operator[](size_type idx) const -> const_reference
+        {
+            auto i = static_cast<typename index_type::value_type>(idx);
+            get_index() = detail::unravel_noexcept(i, m_strides, L);
+            return m_e->element(get_index().cbegin(), get_index().cend());
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::begin() -> iterator
+        {
+            return m_e->template begin<L>();
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::end() -> iterator
+        {
+            return m_e->template end<L>();
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::begin() const -> const_iterator
+        {
+            return m_e->template cbegin<L>();
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::end() const -> const_iterator
+        {
+            return m_e->template cend<L>();
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::cbegin() const -> const_iterator
+        {
+            return m_e->template cbegin<L>();
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::cend() const -> const_iterator
+        {
+            return m_e->template cend<L>();
+        }
+
+        template <class CT, layout_type L>
+        inline auto flat_expression_adaptor<CT, L>::get_index() -> index_type&
+        {
+            thread_local static index_type index;
+            return index;
+        }
+    }
+
+    /**********************************
+     * Builder helpers implementation *
+     **********************************/
+
+    namespace detail
+    {
+        template <class S>
+        struct slice_getter_impl
+        {
+            const S& m_shape;
+            mutable std::size_t idx;
+            using array_type = std::array<std::ptrdiff_t, 3>;
+
+            explicit slice_getter_impl(const S& shape)
+                : m_shape(shape)
+                , idx(0)
+            {
+            }
+
+            template <class T>
+            array_type operator()(const T& /*t*/) const
+            {
+                return array_type{{0, 0, 0}};
+            }
+
+            template <class A, class B, class C>
+            array_type operator()(const xrange_adaptor<A, B, C>& range) const
+            {
+                auto sl = range.get(static_cast<std::size_t>(m_shape[idx]));
+                return array_type({sl(0), sl.size(), sl.step_size()});
+            }
+
+            template <class T>
+            array_type operator()(const xrange<T>& range) const
+            {
+                return array_type({range(T(0)), range.size(), T(1)});
+            }
+
+            template <class T>
+            array_type operator()(const xstepped_range<T>& range) const
+            {
+                return array_type({range(T(0)), range.size(), range.step_size(T(0))});
+            }
+        };
+
+        template <class adj_strides_policy>
+        struct strided_view_args : adj_strides_policy
+        {
+            using base_type = adj_strides_policy;
+
+            template <class S, class ST, class V>
+            void
+            fill_args(const S& shape, ST&& old_strides, std::size_t base_offset, layout_type layout, const V& slices)
+            {
+                // Compute dimension
+                std::size_t dimension = shape.size(), n_newaxis = 0, n_add_all = 0;
+                std::ptrdiff_t dimension_check = static_cast<std::ptrdiff_t>(shape.size());
+
+                bool has_ellipsis = false;
+                for (const auto& el : slices)
+                {
+                    if (xtl::get_if<xt::xnewaxis_tag>(&el) != nullptr)
+                    {
+                        ++dimension;
+                        ++n_newaxis;
+                    }
+                    else if (xtl::get_if<std::ptrdiff_t>(&el) != nullptr)
+                    {
+                        --dimension;
+                        --dimension_check;
+                    }
+                    else if (xtl::get_if<xt::xellipsis_tag>(&el) != nullptr)
+                    {
+                        if (has_ellipsis == true)
+                        {
+                            XTENSOR_THROW(std::runtime_error, "Ellipsis can only appear once.");
+                        }
+                        has_ellipsis = true;
+                    }
+                    else
+                    {
+                        --dimension_check;
+                    }
+                }
+
+                if (dimension_check < 0)
+                {
+                    XTENSOR_THROW(std::runtime_error, "Too many slices for view.");
+                }
+
+                if (has_ellipsis)
+                {
+                    // replace ellipsis with N * xt::all
+                    // remove -1 because of the ellipsis slize itself
+                    n_add_all = shape.size() - (slices.size() - 1 - n_newaxis);
+                }
+
+                // Compute strided view
+                new_offset = base_offset;
+                new_shape.resize(dimension);
+                new_strides.resize(dimension);
+                base_type::resize(dimension);
+
+                auto old_shape = shape;
+                using old_strides_value_type = typename std::decay_t<ST>::value_type;
+
+                std::ptrdiff_t axis_skip = 0;
+                std::size_t idx = 0, i = 0, i_ax = 0;
+
+                auto slice_getter = detail::slice_getter_impl<S>(shape);
+
+                for (; i < slices.size(); ++i)
+                {
+                    i_ax = static_cast<std::size_t>(static_cast<std::ptrdiff_t>(i) - axis_skip);
+                    auto ptr = xtl::get_if<std::ptrdiff_t>(&slices[i]);
+                    if (ptr != nullptr)
+                    {
+                        auto slice0 = static_cast<old_strides_value_type>(*ptr);
+                        new_offset += static_cast<std::size_t>(slice0 * old_strides[i_ax]);
+                    }
+                    else if (xtl::get_if<xt::xnewaxis_tag>(&slices[i]) != nullptr)
+                    {
+                        new_shape[idx] = 1;
+                        base_type::set_fake_slice(idx);
+                        ++axis_skip, ++idx;
+                    }
+                    else if (xtl::get_if<xt::xellipsis_tag>(&slices[i]) != nullptr)
+                    {
+                        for (std::size_t j = 0; j < n_add_all; ++j)
+                        {
+                            new_shape[idx] = old_shape[i_ax];
+                            new_strides[idx] = old_strides[i_ax];
+                            base_type::set_fake_slice(idx);
+                            ++idx, ++i_ax;
+                        }
+                        axis_skip = axis_skip - static_cast<std::ptrdiff_t>(n_add_all) + 1;
+                    }
+                    else if (xtl::get_if<xt::xall_tag>(&slices[i]) != nullptr)
+                    {
+                        new_shape[idx] = old_shape[i_ax];
+                        new_strides[idx] = old_strides[i_ax];
+                        base_type::set_fake_slice(idx);
+                        ++idx;
+                    }
+                    else if (base_type::fill_args(slices, i, idx, old_shape[i_ax], old_strides[i_ax], new_shape, new_strides))
+                    {
+                        ++idx;
+                    }
+                    else
+                    {
+                        slice_getter.idx = i_ax;
+                        auto info = xtl::visit(slice_getter, slices[i]);
+                        new_offset += static_cast<std::size_t>(info[0] * old_strides[i_ax]);
+                        new_shape[idx] = static_cast<std::size_t>(info[1]);
+                        new_strides[idx] = info[2] * old_strides[i_ax];
+                        base_type::set_fake_slice(idx);
+                        ++idx;
+                    }
+                }
+
+                i_ax = static_cast<std::size_t>(static_cast<std::ptrdiff_t>(i) - axis_skip);
+                for (; i_ax < old_shape.size(); ++i_ax, ++idx)
+                {
+                    new_shape[idx] = old_shape[i_ax];
+                    new_strides[idx] = old_strides[i_ax];
+                    base_type::set_fake_slice(idx);
+                }
+
+                new_layout = do_strides_match(new_shape, new_strides, layout, true) ? layout
+                                                                                    : layout_type::dynamic;
+            }
+
+            using shape_type = dynamic_shape<std::size_t>;
+            shape_type new_shape;
+            using strides_type = dynamic_shape<std::ptrdiff_t>;
+            strides_type new_strides;
+            std::size_t new_offset;
+            layout_type new_layout;
+        };
+    }
+}
+
+#endif

+ 916 - 0
3rd/numpy/include/xtensor/xstrides.hpp

@@ -0,0 +1,916 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_STRIDES_HPP
+#define XTENSOR_STRIDES_HPP
+
+#include <cstddef>
+#include <functional>
+#include <limits>
+#include <numeric>
+
+#include <xtl/xsequence.hpp>
+
+#include "xexception.hpp"
+#include "xshape.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_forward.hpp"
+
+namespace xt
+{
+
+    template <class shape_type>
+    std::size_t compute_size(const shape_type& shape) noexcept;
+
+    /**
+     * @defgroup xt_xstrides Support functions swich between array indices and flat indices
+     */
+
+    /***************
+     * data offset *
+     ***************/
+
+    template <class offset_type, class S>
+    offset_type data_offset(const S& strides) noexcept;
+
+    /**
+     * @brief Return the flat index for an array index.
+     *
+     * Given ``m`` arguments, and dimension ``n``of the array (``n == strides.size()``).
+     *
+     *  -   If ``m == n``, the index is
+     *      ``strides[0] * index[0] + ... + strides[n - 1] * index[n - 1]``.
+     *
+     *  -   If ``m < n`` and the last argument is ``xt::missing`` the indices are zero-padded at
+     *      the end to match the dimension of the array. The index is then
+     *      ``strides[0] * index[0] + ... + strides[m - 1] * index[m - 1]``.
+     *
+     *  -   If ``m < n`` (and the last argument is not ``xt::missing``), the index is
+     *      ``strides[n - m - 1] * index[0] + ... + strides[n - 1] * index[m - 1]``.
+     *
+     *  -   If ``m > n``, then the first ``m - n`` arguments are ignored. The index is then
+     *      ``strides[0] * index[m - n] + ... + strides[n - 1] * index[m - 1]``.
+     *
+     * @ingroup xt_xstrides
+     * @param strides Strides of the array.
+     * @param args Array index.
+     * @return The flat index.
+     */
+    template <class offset_type, class S, class Arg, class... Args>
+    offset_type data_offset(const S& strides, Arg arg, Args... args) noexcept;
+
+    template <class offset_type, layout_type L = layout_type::dynamic, class S, class... Args>
+    offset_type unchecked_data_offset(const S& strides, Args... args) noexcept;
+
+    template <class offset_type, class S, class It>
+    offset_type element_offset(const S& strides, It first, It last) noexcept;
+
+    /*******************
+     * strides builder *
+     *******************/
+
+    /**
+     * @brief Compute the strides given the shape and the layout of an array.
+     *
+     * @ingroup xt_xstrides
+     * @param shape Shape of the array.
+     * @param l Layout type, see xt::layout_type().
+     * @param strides (output) Strides of the array.
+     * @return The size: the product of the shape.
+     */
+    template <layout_type L = layout_type::dynamic, class shape_type, class strides_type>
+    std::size_t compute_strides(const shape_type& shape, layout_type l, strides_type& strides);
+
+    template <layout_type L = layout_type::dynamic, class shape_type, class strides_type, class backstrides_type>
+    std::size_t
+    compute_strides(const shape_type& shape, layout_type l, strides_type& strides, backstrides_type& backstrides);
+
+    template <class shape_type, class strides_type>
+    void adapt_strides(const shape_type& shape, strides_type& strides) noexcept;
+
+    template <class shape_type, class strides_type, class backstrides_type>
+    void adapt_strides(const shape_type& shape, strides_type& strides, backstrides_type& backstrides) noexcept;
+
+    /*****************
+     * unravel_index *
+     *****************/
+
+    template <class S>
+    S unravel_from_strides(typename S::value_type index, const S& strides, layout_type l = layout_type::row_major);
+
+    template <class S>
+    get_strides_t<S>
+    unravel_index(typename S::value_type index, const S& shape, layout_type l = layout_type::row_major);
+
+    template <class S, class T>
+    std::vector<get_strides_t<S>>
+    unravel_indices(const T& indices, const S& shape, layout_type l = layout_type::row_major);
+
+    /***********************
+     * broadcast functions *
+     ***********************/
+
+    template <class S, class size_type>
+    S uninitialized_shape(size_type size);
+
+    template <class S1, class S2>
+    bool broadcast_shape(const S1& input, S2& output);
+
+    template <class S1, class S2>
+    bool broadcastable(const S1& s1, S2& s2);
+
+    /*************************
+     * check strides overlap *
+     *************************/
+
+    template <layout_type L>
+    struct check_strides_overlap;
+
+    /**********************************
+     * check bounds, without throwing *
+     **********************************/
+
+    /**
+     * @brief Check if the index is within the bounds of the array.
+     *
+     * @param shape Shape of the array.
+     * @param args Array index.
+     * @return true If the index is within the bounds of the array.
+     * @return false Otherwise.
+     */
+    template <class S, class... Args>
+    bool in_bounds(const S& shape, Args&... args);
+
+    /********************************
+     * apply periodicity to indices *
+     *******************************/
+
+    /**
+     * @brief Normalise an index of a periodic array.
+     * For example if the shape is ``(3, 4)`` and the index is ``(3, -4)`` the result is ``(0, 0)``.
+     *
+     * @ingroup xt_xstrides
+     * @param shape Shape of the array.
+     * @param args (input/output) Array index.
+     */
+    template <class S, class... Args>
+    void normalize_periodic(const S& shape, Args&... args);
+
+    /********************************************
+     * utility functions for strided containers *
+     ********************************************/
+
+    template <class C, class It, class size_type>
+    It strided_data_end(const C& c, It begin, layout_type l, size_type offset)
+    {
+        using difference_type = typename std::iterator_traits<It>::difference_type;
+        if (c.dimension() == 0)
+        {
+            ++begin;
+        }
+        else
+        {
+            for (std::size_t i = 0; i != c.dimension(); ++i)
+            {
+                begin += c.strides()[i] * difference_type(c.shape()[i] - 1);
+            }
+            if (l == layout_type::row_major)
+            {
+                begin += c.strides().back();
+            }
+            else
+            {
+                if (offset == 0)
+                {
+                    begin += c.strides().front();
+                }
+            }
+        }
+        return begin;
+    }
+
+    /***********
+     * strides *
+     ***********/
+
+    namespace detail
+    {
+        template <class return_type, class S, class T, class D>
+        inline return_type compute_stride_impl(layout_type layout, const S& shape, T axis, D default_stride)
+        {
+            if (layout == layout_type::row_major)
+            {
+                return std::accumulate(
+                    shape.cbegin() + axis + 1,
+                    shape.cend(),
+                    static_cast<return_type>(1),
+                    std::multiplies<return_type>()
+                );
+            }
+            if (layout == layout_type::column_major)
+            {
+                return std::accumulate(
+                    shape.cbegin(),
+                    shape.cbegin() + axis,
+                    static_cast<return_type>(1),
+                    std::multiplies<return_type>()
+                );
+            }
+            return default_stride;
+        }
+    }
+
+    /**
+     * @brief Choose stride type
+     * @ingroup xt_xstrides
+     */
+    enum class stride_type
+    {
+        internal = 0,  ///< As used internally (with `stride(axis) == 0` if `shape(axis) == 1`)
+        normal = 1,    ///< Normal stride corresponding to storage.
+        bytes = 2,     ///< Normal stride in bytes.
+    };
+
+    /**
+     * @brief Get strides of an object.
+     *
+     * @ingroup xt_xstrides
+     * @param a an array
+     * @return array
+     */
+    template <class E>
+    inline auto strides(const E& e, stride_type type = stride_type::normal) noexcept
+    {
+        using strides_type = typename E::strides_type;
+        using return_type = typename strides_type::value_type;
+        strides_type ret = e.strides();
+        auto shape = e.shape();
+
+        if (type == stride_type::internal)
+        {
+            return ret;
+        }
+
+        for (std::size_t i = 0; i < ret.size(); ++i)
+        {
+            if (shape[i] == 1)
+            {
+                ret[i] = detail::compute_stride_impl<return_type>(e.layout(), shape, i, ret[i]);
+            }
+        }
+
+        if (type == stride_type::bytes)
+        {
+            return_type f = static_cast<return_type>(sizeof(typename E::value_type));
+            std::for_each(
+                ret.begin(),
+                ret.end(),
+                [f](auto& c)
+                {
+                    c *= f;
+                }
+            );
+        }
+
+        return ret;
+    }
+
+    /**
+     * @brief Get stride of an object along an axis.
+     *
+     * @ingroup xt_xstrides
+     * @param a an array
+     * @return integer
+     */
+    template <class E>
+    inline auto strides(const E& e, std::size_t axis, stride_type type = stride_type::normal) noexcept
+    {
+        using strides_type = typename E::strides_type;
+        using return_type = typename strides_type::value_type;
+
+        return_type ret = e.strides()[axis];
+
+        if (type == stride_type::internal)
+        {
+            return ret;
+        }
+
+        if (ret == 0)
+        {
+            if (e.shape(axis) == 1)
+            {
+                ret = detail::compute_stride_impl<return_type>(e.layout(), e.shape(), axis, ret);
+            }
+        }
+
+        if (type == stride_type::bytes)
+        {
+            return_type f = static_cast<return_type>(sizeof(typename E::value_type));
+            ret *= f;
+        }
+
+        return ret;
+    }
+
+    /******************
+     * Implementation *
+     ******************/
+
+    namespace detail
+    {
+        template <class shape_type>
+        inline std::size_t compute_size_impl(const shape_type& shape, std::true_type /* is signed */)
+        {
+            using size_type = std::decay_t<typename shape_type::value_type>;
+            return static_cast<std::size_t>(std::abs(
+                std::accumulate(shape.cbegin(), shape.cend(), size_type(1), std::multiplies<size_type>())
+            ));
+        }
+
+        template <class shape_type>
+        inline std::size_t compute_size_impl(const shape_type& shape, std::false_type /* is not signed */)
+        {
+            using size_type = std::decay_t<typename shape_type::value_type>;
+            return static_cast<std::size_t>(
+                std::accumulate(shape.cbegin(), shape.cend(), size_type(1), std::multiplies<size_type>())
+            );
+        }
+    }
+
+    template <class shape_type>
+    inline std::size_t compute_size(const shape_type& shape) noexcept
+    {
+        return detail::compute_size_impl(
+            shape,
+            xtl::is_signed<std::decay_t<typename std::decay_t<shape_type>::value_type>>()
+        );
+    }
+
+    namespace detail
+    {
+
+        template <std::size_t dim, class S>
+        inline auto raw_data_offset(const S&) noexcept
+        {
+            using strides_value_type = std::decay_t<decltype(std::declval<S>()[0])>;
+            return strides_value_type(0);
+        }
+
+        template <std::size_t dim, class S>
+        inline auto raw_data_offset(const S&, missing_type) noexcept
+        {
+            using strides_value_type = std::decay_t<decltype(std::declval<S>()[0])>;
+            return strides_value_type(0);
+        }
+
+        template <std::size_t dim, class S, class Arg, class... Args>
+        inline auto raw_data_offset(const S& strides, Arg arg, Args... args) noexcept
+        {
+            return static_cast<std::ptrdiff_t>(arg) * strides[dim] + raw_data_offset<dim + 1>(strides, args...);
+        }
+
+        template <layout_type L, std::ptrdiff_t static_dim>
+        struct layout_data_offset
+        {
+            template <std::size_t dim, class S, class Arg, class... Args>
+            inline static auto run(const S& strides, Arg arg, Args... args) noexcept
+            {
+                return raw_data_offset<dim>(strides, arg, args...);
+            }
+        };
+
+        template <std::ptrdiff_t static_dim>
+        struct layout_data_offset<layout_type::row_major, static_dim>
+        {
+            using self_type = layout_data_offset<layout_type::row_major, static_dim>;
+
+            template <std::size_t dim, class S, class Arg>
+            inline static auto run(const S& strides, Arg arg) noexcept
+            {
+                if (std::ptrdiff_t(dim) + 1 == static_dim)
+                {
+                    return arg;
+                }
+                else
+                {
+                    return arg * strides[dim];
+                }
+            }
+
+            template <std::size_t dim, class S, class Arg, class... Args>
+            inline static auto run(const S& strides, Arg arg, Args... args) noexcept
+            {
+                return arg * strides[dim] + self_type::template run<dim + 1>(strides, args...);
+            }
+        };
+
+        template <std::ptrdiff_t static_dim>
+        struct layout_data_offset<layout_type::column_major, static_dim>
+        {
+            using self_type = layout_data_offset<layout_type::column_major, static_dim>;
+
+            template <std::size_t dim, class S, class Arg>
+            inline static auto run(const S& strides, Arg arg) noexcept
+            {
+                if (dim == 0)
+                {
+                    return arg;
+                }
+                else
+                {
+                    return arg * strides[dim];
+                }
+            }
+
+            template <std::size_t dim, class S, class Arg, class... Args>
+            inline static auto run(const S& strides, Arg arg, Args... args) noexcept
+            {
+                if (dim == 0)
+                {
+                    return arg + self_type::template run<dim + 1>(strides, args...);
+                }
+                else
+                {
+                    return arg * strides[dim] + self_type::template run<dim + 1>(strides, args...);
+                }
+            }
+        };
+    }
+
+    template <class offset_type, class S>
+    inline offset_type data_offset(const S&) noexcept
+    {
+        return offset_type(0);
+    }
+
+    template <class offset_type, class S, class Arg, class... Args>
+    inline offset_type data_offset(const S& strides, Arg arg, Args... args) noexcept
+    {
+        constexpr std::size_t nargs = sizeof...(Args) + 1;
+        if (nargs == strides.size())
+        {
+            // Correct number of arguments: iterate
+            return static_cast<offset_type>(detail::raw_data_offset<0>(strides, arg, args...));
+        }
+        else if (nargs > strides.size())
+        {
+            // Too many arguments: drop the first
+            return data_offset<offset_type, S>(strides, args...);
+        }
+        else if (detail::last_type_is_missing<Args...>)
+        {
+            // Too few arguments & last argument xt::missing: postfix index with zeros
+            return static_cast<offset_type>(detail::raw_data_offset<0>(strides, arg, args...));
+        }
+        else
+        {
+            // Too few arguments: right to left scalar product
+            auto view = strides.cend() - nargs;
+            return static_cast<offset_type>(detail::raw_data_offset<0>(view, arg, args...));
+        }
+    }
+
+    template <class offset_type, layout_type L, class S, class... Args>
+    inline offset_type unchecked_data_offset(const S& strides, Args... args) noexcept
+    {
+        return static_cast<offset_type>(
+            detail::layout_data_offset<L, static_dimension<S>::value>::template run<0>(strides.cbegin(), args...)
+        );
+    }
+
+    template <class offset_type, class S, class It>
+    inline offset_type element_offset(const S& strides, It first, It last) noexcept
+    {
+        using difference_type = typename std::iterator_traits<It>::difference_type;
+        auto size = static_cast<difference_type>(
+            (std::min)(static_cast<typename S::size_type>(std::distance(first, last)), strides.size())
+        );
+        return std::inner_product(last - size, last, strides.cend() - size, offset_type(0));
+    }
+
+    namespace detail
+    {
+        template <class shape_type, class strides_type, class bs_ptr>
+        inline void adapt_strides(
+            const shape_type& shape,
+            strides_type& strides,
+            bs_ptr backstrides,
+            typename strides_type::size_type i
+        ) noexcept
+        {
+            if (shape[i] == 1)
+            {
+                strides[i] = 0;
+            }
+            (*backstrides)[i] = strides[i] * std::ptrdiff_t(shape[i] - 1);
+        }
+
+        template <class shape_type, class strides_type>
+        inline void adapt_strides(
+            const shape_type& shape,
+            strides_type& strides,
+            std::nullptr_t,
+            typename strides_type::size_type i
+        ) noexcept
+        {
+            if (shape[i] == 1)
+            {
+                strides[i] = 0;
+            }
+        }
+
+        template <layout_type L, class shape_type, class strides_type, class bs_ptr>
+        inline std::size_t
+        compute_strides(const shape_type& shape, layout_type l, strides_type& strides, bs_ptr bs)
+        {
+            using strides_value_type = typename std::decay_t<strides_type>::value_type;
+            strides_value_type data_size = 1;
+
+#if defined(_MSC_VER) && (1931 <= _MSC_VER)
+            // Workaround MSVC compiler optimization bug, xtensor#2568
+            if (0 == shape.size())
+            {
+                return static_cast<std::size_t>(data_size);
+            }
+#endif
+
+            if (L == layout_type::row_major || l == layout_type::row_major)
+            {
+                for (std::size_t i = shape.size(); i != 0; --i)
+                {
+                    strides[i - 1] = data_size;
+                    data_size = strides[i - 1] * static_cast<strides_value_type>(shape[i - 1]);
+                    adapt_strides(shape, strides, bs, i - 1);
+                }
+            }
+            else
+            {
+                for (std::size_t i = 0; i < shape.size(); ++i)
+                {
+                    strides[i] = data_size;
+                    data_size = strides[i] * static_cast<strides_value_type>(shape[i]);
+                    adapt_strides(shape, strides, bs, i);
+                }
+            }
+            return static_cast<std::size_t>(data_size);
+        }
+    }
+
+    template <layout_type L, class shape_type, class strides_type>
+    inline std::size_t compute_strides(const shape_type& shape, layout_type l, strides_type& strides)
+    {
+        return detail::compute_strides<L>(shape, l, strides, nullptr);
+    }
+
+    template <layout_type L, class shape_type, class strides_type, class backstrides_type>
+    inline std::size_t
+    compute_strides(const shape_type& shape, layout_type l, strides_type& strides, backstrides_type& backstrides)
+    {
+        return detail::compute_strides<L>(shape, l, strides, &backstrides);
+    }
+
+    template <class T1, class T2>
+    inline bool
+    stride_match_condition(const T1& stride, const T2& shape, const T1& data_size, bool zero_strides)
+    {
+        return (shape == T2(1) && stride == T1(0) && zero_strides) || (stride == data_size);
+    }
+
+    // zero_strides should be true when strides are set to 0 if the corresponding dimensions are 1
+    template <class shape_type, class strides_type>
+    inline bool
+    do_strides_match(const shape_type& shape, const strides_type& strides, layout_type l, bool zero_strides)
+    {
+        using value_type = typename strides_type::value_type;
+        value_type data_size = 1;
+        if (l == layout_type::row_major)
+        {
+            for (std::size_t i = strides.size(); i != 0; --i)
+            {
+                if (!stride_match_condition(strides[i - 1], shape[i - 1], data_size, zero_strides))
+                {
+                    return false;
+                }
+                data_size *= static_cast<value_type>(shape[i - 1]);
+            }
+            return true;
+        }
+        else if (l == layout_type::column_major)
+        {
+            for (std::size_t i = 0; i < strides.size(); ++i)
+            {
+                if (!stride_match_condition(strides[i], shape[i], data_size, zero_strides))
+                {
+                    return false;
+                }
+                data_size *= static_cast<value_type>(shape[i]);
+            }
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    template <class shape_type, class strides_type>
+    inline void adapt_strides(const shape_type& shape, strides_type& strides) noexcept
+    {
+        for (typename shape_type::size_type i = 0; i < shape.size(); ++i)
+        {
+            detail::adapt_strides(shape, strides, nullptr, i);
+        }
+    }
+
+    template <class shape_type, class strides_type, class backstrides_type>
+    inline void
+    adapt_strides(const shape_type& shape, strides_type& strides, backstrides_type& backstrides) noexcept
+    {
+        for (typename shape_type::size_type i = 0; i < shape.size(); ++i)
+        {
+            detail::adapt_strides(shape, strides, &backstrides, i);
+        }
+    }
+
+    namespace detail
+    {
+        template <class S>
+        inline S unravel_noexcept(typename S::value_type idx, const S& strides, layout_type l) noexcept
+        {
+            using value_type = typename S::value_type;
+            using size_type = typename S::size_type;
+            S result = xtl::make_sequence<S>(strides.size(), 0);
+            if (l == layout_type::row_major)
+            {
+                for (size_type i = 0; i < strides.size(); ++i)
+                {
+                    value_type str = strides[i];
+                    value_type quot = str != 0 ? idx / str : 0;
+                    idx = str != 0 ? idx % str : idx;
+                    result[i] = quot;
+                }
+            }
+            else
+            {
+                for (size_type i = strides.size(); i != 0; --i)
+                {
+                    value_type str = strides[i - 1];
+                    value_type quot = str != 0 ? idx / str : 0;
+                    idx = str != 0 ? idx % str : idx;
+                    result[i - 1] = quot;
+                }
+            }
+            return result;
+        }
+    }
+
+    template <class S>
+    inline S unravel_from_strides(typename S::value_type index, const S& strides, layout_type l)
+    {
+        if (l != layout_type::row_major && l != layout_type::column_major)
+        {
+            XTENSOR_THROW(std::runtime_error, "unravel_index: dynamic layout not supported");
+        }
+        return detail::unravel_noexcept(index, strides, l);
+    }
+
+    template <class S, class T>
+    inline get_value_type_t<T> ravel_from_strides(const T& index, const S& strides)
+    {
+        return element_offset<get_value_type_t<T>>(strides, index.begin(), index.end());
+    }
+
+    template <class S>
+    inline get_strides_t<S> unravel_index(typename S::value_type index, const S& shape, layout_type l)
+    {
+        using strides_type = get_strides_t<S>;
+        using strides_value_type = typename strides_type::value_type;
+        strides_type strides = xtl::make_sequence<strides_type>(shape.size(), 0);
+        compute_strides(shape, l, strides);
+        return unravel_from_strides(static_cast<strides_value_type>(index), strides, l);
+    }
+
+    template <class S, class T>
+    inline std::vector<get_strides_t<S>> unravel_indices(const T& idx, const S& shape, layout_type l)
+    {
+        using strides_type = get_strides_t<S>;
+        using strides_value_type = typename strides_type::value_type;
+        strides_type strides = xtl::make_sequence<strides_type>(shape.size(), 0);
+        compute_strides(shape, l, strides);
+        std::vector<get_strides_t<S>> out(idx.size());
+        auto out_iter = out.begin();
+        auto idx_iter = idx.begin();
+        for (; out_iter != out.end(); ++out_iter, ++idx_iter)
+        {
+            *out_iter = unravel_from_strides(static_cast<strides_value_type>(*idx_iter), strides, l);
+        }
+        return out;
+    }
+
+    template <class S, class T>
+    inline get_value_type_t<T> ravel_index(const T& index, const S& shape, layout_type l)
+    {
+        using strides_type = get_strides_t<S>;
+        strides_type strides = xtl::make_sequence<strides_type>(shape.size(), 0);
+        compute_strides(shape, l, strides);
+        return ravel_from_strides(index, strides);
+    }
+
+    template <class S, class stype>
+    inline S uninitialized_shape(stype size)
+    {
+        using value_type = typename S::value_type;
+        using size_type = typename S::size_type;
+        return xtl::make_sequence<S>(static_cast<size_type>(size), std::numeric_limits<value_type>::max());
+    }
+
+    template <class S1, class S2>
+    inline bool broadcast_shape(const S1& input, S2& output)
+    {
+        bool trivial_broadcast = (input.size() == output.size());
+        // Indices are faster than reverse iterators
+        using value_type = typename S2::value_type;
+        auto output_index = output.size();
+        auto input_index = input.size();
+
+        if (output_index < input_index)
+        {
+            throw_broadcast_error(output, input);
+        }
+        for (; input_index != 0; --input_index, --output_index)
+        {
+            // First case: output = (MAX, MAX, ...., MAX)
+            // output is a new shape that has not been through
+            // the broadcast process yet; broadcast is trivial
+            if (output[output_index - 1] == std::numeric_limits<value_type>::max())
+            {
+                output[output_index - 1] = static_cast<value_type>(input[input_index - 1]);
+            }
+            // Second case: output has been initialized to 1. Broadcast is trivial
+            // only if input is 1 to.
+            else if (output[output_index - 1] == 1)
+            {
+                output[output_index - 1] = static_cast<value_type>(input[input_index - 1]);
+                trivial_broadcast = trivial_broadcast && (input[input_index - 1] == 1);
+            }
+            // Third case: output has been initialized to something different from 1.
+            // if input is 1, then the broadcast is not trivial
+            else if (input[input_index - 1] == 1)
+            {
+                trivial_broadcast = false;
+            }
+            // Last case: input and output must have the same value, else
+            // shape are not compatible and an exception is thrown
+            else if (static_cast<value_type>(input[input_index - 1]) != output[output_index - 1])
+            {
+                throw_broadcast_error(output, input);
+            }
+        }
+        return trivial_broadcast;
+    }
+
+    template <class S1, class S2>
+    inline bool broadcastable(const S1& src_shape, const S2& dst_shape)
+    {
+        auto src_iter = src_shape.crbegin();
+        auto dst_iter = dst_shape.crbegin();
+        bool res = dst_shape.size() >= src_shape.size();
+        for (; src_iter != src_shape.crend() && res; ++src_iter, ++dst_iter)
+        {
+            res = (static_cast<std::size_t>(*src_iter) == static_cast<std::size_t>(*dst_iter))
+                  || (*src_iter == 1);
+        }
+        return res;
+    }
+
+    template <>
+    struct check_strides_overlap<layout_type::row_major>
+    {
+        template <class S1, class S2>
+        static std::size_t get(const S1& s1, const S2& s2)
+        {
+            using value_type = typename S1::value_type;
+            // Indices are faster than reverse iterators
+            auto s1_index = s1.size();
+            auto s2_index = s2.size();
+
+            for (; s2_index != 0; --s1_index, --s2_index)
+            {
+                if (static_cast<value_type>(s1[s1_index - 1]) != static_cast<value_type>(s2[s2_index - 1]))
+                {
+                    break;
+                }
+            }
+            return s1_index;
+        }
+    };
+
+    template <>
+    struct check_strides_overlap<layout_type::column_major>
+    {
+        template <class S1, class S2>
+        static std::size_t get(const S1& s1, const S2& s2)
+        {
+            // Indices are faster than reverse iterators
+            using size_type = typename S1::size_type;
+            using value_type = typename S1::value_type;
+            size_type index = 0;
+
+            // This check is necessary as column major "broadcasting" is still
+            // performed in a row major fashion
+            if (s1.size() != s2.size())
+            {
+                return 0;
+            }
+
+            auto size = s2.size();
+
+            for (; index < size; ++index)
+            {
+                if (static_cast<value_type>(s1[index]) != static_cast<value_type>(s2[index]))
+                {
+                    break;
+                }
+            }
+            return index;
+        }
+    };
+
+    namespace detail
+    {
+        template <class S, std::size_t dim>
+        inline bool check_in_bounds_impl(const S&)
+        {
+            return true;
+        }
+
+        template <class S, std::size_t dim>
+        inline bool check_in_bounds_impl(const S&, missing_type)
+        {
+            return true;
+        }
+
+        template <class S, std::size_t dim, class T, class... Args>
+        inline bool check_in_bounds_impl(const S& shape, T& arg, Args&... args)
+        {
+            if (sizeof...(Args) + 1 > shape.size())
+            {
+                return check_in_bounds_impl<S, dim>(shape, args...);
+            }
+            else
+            {
+                return arg >= T(0) && arg < static_cast<T>(shape[dim])
+                       && check_in_bounds_impl<S, dim + 1>(shape, args...);
+            }
+        }
+    }
+
+    template <class S, class... Args>
+    inline bool check_in_bounds(const S& shape, Args&... args)
+    {
+        return detail::check_in_bounds_impl<S, 0>(shape, args...);
+    }
+
+    namespace detail
+    {
+        template <class S, std::size_t dim>
+        inline void normalize_periodic_impl(const S&)
+        {
+        }
+
+        template <class S, std::size_t dim>
+        inline void normalize_periodic_impl(const S&, missing_type)
+        {
+        }
+
+        template <class S, std::size_t dim, class T, class... Args>
+        inline void normalize_periodic_impl(const S& shape, T& arg, Args&... args)
+        {
+            if (sizeof...(Args) + 1 > shape.size())
+            {
+                normalize_periodic_impl<S, dim>(shape, args...);
+            }
+            else
+            {
+                T n = static_cast<T>(shape[dim]);
+                arg = (n + (arg % n)) % n;
+                normalize_periodic_impl<S, dim + 1>(shape, args...);
+            }
+        }
+    }
+
+    template <class S, class... Args>
+    inline void normalize_periodic(const S& shape, Args&... args)
+    {
+        check_dimension(shape, args...);
+        detail::normalize_periodic_impl<S, 0>(shape, args...);
+    }
+}
+
+#endif

+ 983 - 0
3rd/numpy/include/xtensor/xtensor.hpp

@@ -0,0 +1,983 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_TENSOR_HPP
+#define XTENSOR_TENSOR_HPP
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+#include "xbuffer_adaptor.hpp"
+#include "xcontainer.hpp"
+#include "xsemantic.hpp"
+
+namespace xt
+{
+
+    /***********************
+     * xtensor declaration *
+     ***********************/
+
+    namespace extension
+    {
+        template <class EC, std::size_t N, layout_type L, class Tag>
+        struct xtensor_container_base;
+
+        template <class EC, std::size_t N, layout_type L>
+        struct xtensor_container_base<EC, N, L, xtensor_expression_tag>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class EC, std::size_t N, layout_type L, class Tag>
+        using xtensor_container_base_t = typename xtensor_container_base<EC, N, L, Tag>::type;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    struct xcontainer_inner_types<xtensor_container<EC, N, L, Tag>>
+    {
+        using storage_type = EC;
+        using reference = inner_reference_t<storage_type>;
+        using const_reference = typename storage_type::const_reference;
+        using size_type = typename storage_type::size_type;
+        using shape_type = std::array<typename storage_type::size_type, N>;
+        using strides_type = get_strides_t<shape_type>;
+        using backstrides_type = get_strides_t<shape_type>;
+        using inner_shape_type = shape_type;
+        using inner_strides_type = strides_type;
+        using inner_backstrides_type = backstrides_type;
+        using temporary_type = xtensor_container<EC, N, L, Tag>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    struct xiterable_inner_types<xtensor_container<EC, N, L, Tag>>
+        : xcontainer_iterable_types<xtensor_container<EC, N, L, Tag>>
+    {
+    };
+
+    /**
+     * @class xtensor_container
+     * @brief Dense multidimensional container with tensor semantic and fixed
+     * dimension.
+     *
+     * The xtensor_container class implements a dense multidimensional container
+     * with tensor semantics and fixed dimension
+     *
+     * @tparam EC The type of the container holding the elements.
+     * @tparam N The dimension of the container.
+     * @tparam L The layout_type of the tensor.
+     * @tparam Tag The expression tag.
+     * @sa xtensor, xstrided_container, xcontainer
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    class xtensor_container : public xstrided_container<xtensor_container<EC, N, L, Tag>>,
+                              public xcontainer_semantic<xtensor_container<EC, N, L, Tag>>,
+                              public extension::xtensor_container_base_t<EC, N, L, Tag>
+    {
+    public:
+
+        using self_type = xtensor_container<EC, N, L, Tag>;
+        using base_type = xstrided_container<self_type>;
+        using semantic_base = xcontainer_semantic<self_type>;
+        using extension_base = extension::xtensor_container_base_t<EC, N, L, Tag>;
+        using storage_type = typename base_type::storage_type;
+        using allocator_type = typename base_type::allocator_type;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+        using shape_type = typename base_type::shape_type;
+        using inner_shape_type = typename base_type::inner_shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+        using inner_backstrides_type = typename base_type::inner_backstrides_type;
+        using inner_strides_type = typename base_type::inner_strides_type;
+        using temporary_type = typename semantic_base::temporary_type;
+        using expression_tag = Tag;
+        static constexpr std::size_t rank = N;
+
+        xtensor_container();
+        xtensor_container(nested_initializer_list_t<value_type, N> t);
+        explicit xtensor_container(const shape_type& shape, layout_type l = L);
+        explicit xtensor_container(const shape_type& shape, const_reference value, layout_type l = L);
+        explicit xtensor_container(const shape_type& shape, const strides_type& strides);
+        explicit xtensor_container(const shape_type& shape, const strides_type& strides, const_reference value);
+        explicit xtensor_container(storage_type&& storage, inner_shape_type&& shape, inner_strides_type&& strides);
+
+        template <class S = shape_type>
+        static xtensor_container from_shape(S&& s);
+
+        ~xtensor_container() = default;
+
+        xtensor_container(const xtensor_container&) = default;
+        xtensor_container& operator=(const xtensor_container&) = default;
+
+        xtensor_container(xtensor_container&&) = default;
+        xtensor_container& operator=(xtensor_container&&) = default;
+
+        template <class SC>
+        explicit xtensor_container(xarray_container<EC, L, SC, Tag>&&);
+        template <class SC>
+        xtensor_container& operator=(xarray_container<EC, L, SC, Tag>&&);
+
+        template <class E>
+        xtensor_container(const xexpression<E>& e);
+
+        template <class E>
+        xtensor_container& operator=(const xexpression<E>& e);
+
+    private:
+
+        storage_type m_storage;
+
+        storage_type& storage_impl() noexcept;
+        const storage_type& storage_impl() const noexcept;
+
+        friend class xcontainer<xtensor_container<EC, N, L, Tag>>;
+    };
+
+    /*****************************************
+     * xtensor_container_adaptor declaration *
+     *****************************************/
+
+    namespace extension
+    {
+        template <class EC, std::size_t N, layout_type L, class Tag>
+        struct xtensor_adaptor_base;
+
+        template <class EC, std::size_t N, layout_type L>
+        struct xtensor_adaptor_base<EC, N, L, xtensor_expression_tag>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class EC, std::size_t N, layout_type L, class Tag>
+        using xtensor_adaptor_base_t = typename xtensor_adaptor_base<EC, N, L, Tag>::type;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    struct xcontainer_inner_types<xtensor_adaptor<EC, N, L, Tag>>
+    {
+        using storage_type = std::remove_reference_t<EC>;
+        using reference = inner_reference_t<storage_type>;
+        using const_reference = typename storage_type::const_reference;
+        using size_type = typename storage_type::size_type;
+        using shape_type = std::array<typename storage_type::size_type, N>;
+        using strides_type = get_strides_t<shape_type>;
+        using backstrides_type = get_strides_t<shape_type>;
+        using inner_shape_type = shape_type;
+        using inner_strides_type = strides_type;
+        using inner_backstrides_type = backstrides_type;
+        using temporary_type = xtensor_container<temporary_container_t<storage_type>, N, L, Tag>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    struct xiterable_inner_types<xtensor_adaptor<EC, N, L, Tag>>
+        : xcontainer_iterable_types<xtensor_adaptor<EC, N, L, Tag>>
+    {
+    };
+
+    /**
+     * @class xtensor_adaptor
+     * @brief Dense multidimensional container adaptor with tensor
+     * semantics and fixed dimension.
+     *
+     * The xtensor_adaptor class implements a dense multidimensional
+     * container adaptor with tensor semantics and fixed dimension. It
+     * is used to provide a multidimensional container semantic and a
+     * tensor semantic to stl-like containers.
+     *
+     * @tparam EC The closure for the container type to adapt.
+     * @tparam N The dimension of the adaptor.
+     * @tparam L The layout_type of the adaptor.
+     * @tparam Tag The expression tag.
+     * @sa xstrided_container, xcontainer
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    class xtensor_adaptor : public xstrided_container<xtensor_adaptor<EC, N, L, Tag>>,
+                            public xcontainer_semantic<xtensor_adaptor<EC, N, L, Tag>>,
+                            public extension::xtensor_adaptor_base_t<EC, N, L, Tag>
+    {
+    public:
+
+        using container_closure_type = EC;
+
+        using self_type = xtensor_adaptor<EC, N, L, Tag>;
+        using base_type = xstrided_container<self_type>;
+        using semantic_base = xcontainer_semantic<self_type>;
+        using extension_base = extension::xtensor_adaptor_base_t<EC, N, L, Tag>;
+        using storage_type = typename base_type::storage_type;
+        using allocator_type = typename base_type::allocator_type;
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+        using temporary_type = typename semantic_base::temporary_type;
+        using expression_tag = Tag;
+        static constexpr std::size_t rank = N;
+
+        xtensor_adaptor(storage_type&& storage);
+        xtensor_adaptor(const storage_type& storage);
+
+        template <class D>
+        xtensor_adaptor(D&& storage, const shape_type& shape, layout_type l = L);
+
+        template <class D>
+        xtensor_adaptor(D&& storage, const shape_type& shape, const strides_type& strides);
+
+        ~xtensor_adaptor() = default;
+
+        xtensor_adaptor(const xtensor_adaptor&) = default;
+        xtensor_adaptor& operator=(const xtensor_adaptor&);
+
+        xtensor_adaptor(xtensor_adaptor&&) = default;
+        xtensor_adaptor& operator=(xtensor_adaptor&&);
+        xtensor_adaptor& operator=(temporary_type&&);
+
+        template <class E>
+        xtensor_adaptor& operator=(const xexpression<E>& e);
+
+        template <class P, class S>
+        void reset_buffer(P&& pointer, S&& size);
+
+    private:
+
+        container_closure_type m_storage;
+
+        storage_type& storage_impl() noexcept;
+        const storage_type& storage_impl() const noexcept;
+
+        friend class xcontainer<xtensor_adaptor<EC, N, L, Tag>>;
+    };
+
+    /****************************
+     * xtensor_view declaration *
+     ****************************/
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    class xtensor_view;
+
+    namespace extension
+    {
+        template <class EC, std::size_t N, layout_type L, class Tag>
+        struct xtensor_view_base;
+
+        template <class EC, std::size_t N, layout_type L>
+        struct xtensor_view_base<EC, N, L, xtensor_expression_tag>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class EC, std::size_t N, layout_type L, class Tag>
+        using xtensor_view_base_t = typename xtensor_view_base<EC, N, L, Tag>::type;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    struct xcontainer_inner_types<xtensor_view<EC, N, L, Tag>>
+    {
+        using storage_type = std::remove_reference_t<EC>;
+        using reference = inner_reference_t<storage_type>;
+        using const_reference = typename storage_type::const_reference;
+        using size_type = typename storage_type::size_type;
+        using shape_type = std::array<typename storage_type::size_type, N>;
+        using strides_type = get_strides_t<shape_type>;
+        using backstrides_type = get_strides_t<shape_type>;
+        using inner_shape_type = shape_type;
+        using inner_strides_type = strides_type;
+        using inner_backstrides_type = backstrides_type;
+        using temporary_type = xtensor_container<temporary_container_t<storage_type>, N, L, Tag>;
+        static constexpr layout_type layout = L;
+    };
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    struct xiterable_inner_types<xtensor_view<EC, N, L, Tag>>
+        : xcontainer_iterable_types<xtensor_view<EC, N, L, Tag>>
+    {
+    };
+
+    /**
+     * @class xtensor_view
+     * @brief Dense multidimensional container adaptor with view
+     * semantics and fixed dimension.
+     *
+     * The xtensor_view class implements a dense multidimensional
+     * container adaptor with viewsemantics and fixed dimension. It
+     * is used to provide a multidimensional container semantic and a
+     * view semantic to stl-like containers.
+     *
+     * @tparam EC The closure for the container type to adapt.
+     * @tparam N The dimension of the view.
+     * @tparam L The layout_type of the view.
+     * @tparam Tag The expression tag.
+     * @sa xstrided_container, xcontainer
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    class xtensor_view : public xstrided_container<xtensor_view<EC, N, L, Tag>>,
+                         public xview_semantic<xtensor_view<EC, N, L, Tag>>,
+                         public extension::xtensor_view_base_t<EC, N, L, Tag>
+    {
+    public:
+
+        using container_closure_type = EC;
+
+        using self_type = xtensor_view<EC, N, L, Tag>;
+        using base_type = xstrided_container<self_type>;
+        using semantic_base = xview_semantic<self_type>;
+        using extension_base = extension::xtensor_adaptor_base_t<EC, N, L, Tag>;
+        using storage_type = typename base_type::storage_type;
+        using allocator_type = typename base_type::allocator_type;
+        using shape_type = typename base_type::shape_type;
+        using strides_type = typename base_type::strides_type;
+        using backstrides_type = typename base_type::backstrides_type;
+        using temporary_type = typename semantic_base::temporary_type;
+        using expression_tag = Tag;
+
+        xtensor_view(storage_type&& storage);
+        xtensor_view(const storage_type& storage);
+
+        template <class D>
+        xtensor_view(D&& storage, const shape_type& shape, layout_type l = L);
+
+        template <class D>
+        xtensor_view(D&& storage, const shape_type& shape, const strides_type& strides);
+
+        ~xtensor_view() = default;
+
+        xtensor_view(const xtensor_view&) = default;
+        xtensor_view& operator=(const xtensor_view&);
+
+        xtensor_view(xtensor_view&&) = default;
+        xtensor_view& operator=(xtensor_view&&);
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+    private:
+
+        container_closure_type m_storage;
+
+        storage_type& storage_impl() noexcept;
+        const storage_type& storage_impl() const noexcept;
+
+        void assign_temporary_impl(temporary_type&& tmp);
+
+        friend class xcontainer<xtensor_view<EC, N, L, Tag>>;
+        friend class xview_semantic<xtensor_view<EC, N, L, Tag>>;
+    };
+
+    namespace detail
+    {
+        template <class V>
+        struct tensor_view_simd_helper
+        {
+            using valid_return_type = detail::has_simd_interface_impl<V, typename V::value_type>;
+            using valid_reference = std::is_lvalue_reference<typename V::reference>;
+            static constexpr bool value = valid_return_type::value && valid_reference::value;
+            using type = std::integral_constant<bool, value>;
+        };
+    }
+
+    // xtensor_view can be used on pseudo containers, i.e. containers
+    // whose access operator does not return a reference. Since it
+    // is not possible to take the address f a temporary, the load_simd
+    // method implementation leads to a compilation error.
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    struct has_simd_interface<xtensor_view<EC, N, L, Tag>>
+        : detail::tensor_view_simd_helper<xtensor_view<EC, N, L, Tag>>::type
+    {
+    };
+
+    /************************************
+     * xtensor_container implementation *
+     ************************************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Allocates an uninitialized xtensor_container that holds 0 elements.
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container()
+        : base_type()
+        , m_storage(N == 0 ? 1 : 0, value_type())
+    {
+    }
+
+    /**
+     * Allocates an xtensor_container with nested initializer lists.
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(nested_initializer_list_t<value_type, N> t)
+        : base_type()
+    {
+        base_type::resize(xt::shape<shape_type>(t), true);
+        constexpr auto tmp = layout_type::row_major;
+        L == tmp ? nested_copy(m_storage.begin(), t) : nested_copy(this->template begin<tmp>(), t);
+    }
+
+    /**
+     * Allocates an uninitialized xtensor_container with the specified shape and
+     * layout_type.
+     * @param shape the shape of the xtensor_container
+     * @param l the layout_type of the xtensor_container
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(const shape_type& shape, layout_type l)
+        : base_type()
+    {
+        base_type::resize(shape, l);
+    }
+
+    /**
+     * Allocates an xtensor_container with the specified shape and layout_type. Elements
+     * are initialized to the specified value.
+     * @param shape the shape of the xtensor_container
+     * @param value the value of the elements
+     * @param l the layout_type of the xtensor_container
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(
+        const shape_type& shape,
+        const_reference value,
+        layout_type l
+    )
+        : base_type()
+    {
+        base_type::resize(shape, l);
+        std::fill(m_storage.begin(), m_storage.end(), value);
+    }
+
+    /**
+     * Allocates an uninitialized xtensor_container with the specified shape and strides.
+     * @param shape the shape of the xtensor_container
+     * @param strides the strides of the xtensor_container
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(const shape_type& shape, const strides_type& strides)
+        : base_type()
+    {
+        base_type::resize(shape, strides);
+    }
+
+    /**
+     * Allocates an uninitialized xtensor_container with the specified shape and strides.
+     * Elements are initialized to the specified value.
+     * @param shape the shape of the xtensor_container
+     * @param strides the strides of the xtensor_container
+     * @param value the value of the elements
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(
+        const shape_type& shape,
+        const strides_type& strides,
+        const_reference value
+    )
+        : base_type()
+    {
+        base_type::resize(shape, strides);
+        std::fill(m_storage.begin(), m_storage.end(), value);
+    }
+
+    /**
+     * Allocates an xtensor_container by moving specified data, shape and strides
+     *
+     * @param storage the data for the xtensor_container
+     * @param shape the shape of the xtensor_container
+     * @param strides the strides of the xtensor_container
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(
+        storage_type&& storage,
+        inner_shape_type&& shape,
+        inner_strides_type&& strides
+    )
+        : base_type(std::move(shape), std::move(strides))
+        , m_storage(std::move(storage))
+    {
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class SC>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(xarray_container<EC, L, SC, Tag>&& rhs)
+        : base_type(
+            xtl::forward_sequence<inner_shape_type, decltype(rhs.shape())>(rhs.shape()),
+            xtl::forward_sequence<inner_strides_type, decltype(rhs.strides())>(rhs.strides()),
+            xtl::forward_sequence<inner_backstrides_type, decltype(rhs.backstrides())>(rhs.backstrides()),
+            std::move(rhs.layout())
+        )
+        , m_storage(std::move(rhs.storage()))
+    {
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class SC>
+    inline xtensor_container<EC, N, L, Tag>&
+    xtensor_container<EC, N, L, Tag>::operator=(xarray_container<EC, L, SC, Tag>&& rhs)
+    {
+        XTENSOR_ASSERT_MSG(N == rhs.dimension(), "Cannot change dimension of xtensor.");
+        std::copy(rhs.shape().begin(), rhs.shape().end(), this->shape_impl().begin());
+        std::copy(rhs.strides().cbegin(), rhs.strides().cend(), this->strides_impl().begin());
+        std::copy(rhs.backstrides().cbegin(), rhs.backstrides().cend(), this->backstrides_impl().begin());
+        this->mutable_layout() = std::move(rhs.layout());
+        m_storage = std::move(std::move(rhs.storage()));
+        return *this;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class S>
+    inline xtensor_container<EC, N, L, Tag> xtensor_container<EC, N, L, Tag>::from_shape(S&& s)
+    {
+        XTENSOR_ASSERT_MSG(s.size() == N, "Cannot change dimension of xtensor.");
+        shape_type shape = xtl::forward_sequence<shape_type, S>(s);
+        return self_type(shape);
+    }
+
+    //@}
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended copy constructor.
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class E>
+    inline xtensor_container<EC, N, L, Tag>::xtensor_container(const xexpression<E>& e)
+        : base_type()
+    {
+        XTENSOR_ASSERT_MSG(N == e.derived_cast().dimension(), "Cannot change dimension of xtensor.");
+        // Avoids uninitialized data because of (m_shape == shape) condition
+        // in resize (called by assign), which is always true when dimension() == 0.
+        if (e.derived_cast().dimension() == 0)
+        {
+            detail::resize_data_container(m_storage, std::size_t(1));
+        }
+        semantic_base::assign(e);
+    }
+
+    /**
+     * The extended assignment operator.
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class E>
+    inline auto xtensor_container<EC, N, L, Tag>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_container<EC, N, L, Tag>::storage_impl() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_container<EC, N, L, Tag>::storage_impl() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    /**********************************
+     * xtensor_adaptor implementation *
+     **********************************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Constructs an xtensor_adaptor of the given stl-like container.
+     * @param storage the container to adapt
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_adaptor<EC, N, L, Tag>::xtensor_adaptor(storage_type&& storage)
+        : base_type()
+        , m_storage(std::move(storage))
+    {
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given stl-like container.
+     * @param storage the container to adapt
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_adaptor<EC, N, L, Tag>::xtensor_adaptor(const storage_type& storage)
+        : base_type()
+        , m_storage(storage)
+    {
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given stl-like container,
+     * with the specified shape and layout_type.
+     * @param storage the container to adapt
+     * @param shape the shape of the xtensor_adaptor
+     * @param l the layout_type of the xtensor_adaptor
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class D>
+    inline xtensor_adaptor<EC, N, L, Tag>::xtensor_adaptor(D&& storage, const shape_type& shape, layout_type l)
+        : base_type()
+        , m_storage(std::forward<D>(storage))
+    {
+        base_type::resize(shape, l);
+    }
+
+    /**
+     * Constructs an xtensor_adaptor of the given stl-like container,
+     * with the specified shape and strides.
+     * @param storage the container to adapt
+     * @param shape the shape of the xtensor_adaptor
+     * @param strides the strides of the xtensor_adaptor
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class D>
+    inline xtensor_adaptor<EC, N, L, Tag>::xtensor_adaptor(
+        D&& storage,
+        const shape_type& shape,
+        const strides_type& strides
+    )
+        : base_type()
+        , m_storage(std::forward<D>(storage))
+    {
+        base_type::resize(shape, strides);
+    }
+
+    //@}
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_adaptor<EC, N, L, Tag>::operator=(const xtensor_adaptor& rhs) -> self_type&
+    {
+        base_type::operator=(rhs);
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_adaptor<EC, N, L, Tag>::operator=(xtensor_adaptor&& rhs) -> self_type&
+    {
+        base_type::operator=(std::move(rhs));
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_adaptor<EC, N, L, Tag>::operator=(temporary_type&& rhs) -> self_type&
+    {
+        base_type::shape_impl() = std::move(const_cast<shape_type&>(rhs.shape()));
+        base_type::strides_impl() = std::move(const_cast<strides_type&>(rhs.strides()));
+        base_type::backstrides_impl() = std::move(const_cast<backstrides_type&>(rhs.backstrides()));
+        m_storage = std::move(rhs.storage());
+        return *this;
+    }
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class E>
+    inline auto xtensor_adaptor<EC, N, L, Tag>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_adaptor<EC, N, L, Tag>::storage_impl() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_adaptor<EC, N, L, Tag>::storage_impl() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class P, class S>
+    inline void xtensor_adaptor<EC, N, L, Tag>::reset_buffer(P&& pointer, S&& size)
+    {
+        return m_storage.reset_data(std::forward<P>(pointer), std::forward<S>(size));
+    }
+
+    /*******************************
+     * xtensor_view implementation *
+     *******************************/
+
+    /**
+     * @name Constructors
+     */
+    //@{
+    /**
+     * Constructs an xtensor_view of the given stl-like container.
+     * @param storage the container to adapt
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_view<EC, N, L, Tag>::xtensor_view(storage_type&& storage)
+        : base_type()
+        , m_storage(std::move(storage))
+    {
+    }
+
+    /**
+     * Constructs an xtensor_view of the given stl-like container.
+     * @param storage the container to adapt
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline xtensor_view<EC, N, L, Tag>::xtensor_view(const storage_type& storage)
+        : base_type()
+        , m_storage(storage)
+    {
+    }
+
+    /**
+     * Constructs an xtensor_view of the given stl-like container,
+     * with the specified shape and layout_type.
+     * @param storage the container to adapt
+     * @param shape the shape of the xtensor_view
+     * @param l the layout_type of the xtensor_view
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class D>
+    inline xtensor_view<EC, N, L, Tag>::xtensor_view(D&& storage, const shape_type& shape, layout_type l)
+        : base_type()
+        , m_storage(std::forward<D>(storage))
+    {
+        base_type::resize(shape, l);
+    }
+
+    /**
+     * Constructs an xtensor_view of the given stl-like container,
+     * with the specified shape and strides.
+     * @param storage the container to adapt
+     * @param shape the shape of the xtensor_view
+     * @param strides the strides of the xtensor_view
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class D>
+    inline xtensor_view<EC, N, L, Tag>::xtensor_view(D&& storage, const shape_type& shape, const strides_type& strides)
+        : base_type()
+        , m_storage(std::forward<D>(storage))
+    {
+        base_type::resize(shape, strides);
+    }
+
+    //@}
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_view<EC, N, L, Tag>::operator=(const xtensor_view& rhs) -> self_type&
+    {
+        base_type::operator=(rhs);
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_view<EC, N, L, Tag>::operator=(xtensor_view&& rhs) -> self_type&
+    {
+        base_type::operator=(std::move(rhs));
+        m_storage = rhs.m_storage;
+        return *this;
+    }
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class E>
+    inline auto xtensor_view<EC, N, L, Tag>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    template <class E>
+    inline auto xtensor_view<EC, N, L, Tag>::operator=(const E& e) -> disable_xexpression<E, self_type>&
+    {
+        std::fill(m_storage.begin(), m_storage.end(), e);
+        return *this;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_view<EC, N, L, Tag>::storage_impl() noexcept -> storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline auto xtensor_view<EC, N, L, Tag>::storage_impl() const noexcept -> const storage_type&
+    {
+        return m_storage;
+    }
+
+    template <class EC, std::size_t N, layout_type L, class Tag>
+    inline void xtensor_view<EC, N, L, Tag>::assign_temporary_impl(temporary_type&& tmp)
+    {
+        std::copy(tmp.cbegin(), tmp.cend(), m_storage.begin());
+    }
+
+    /**
+     * Converts ``std::vector<index_type>`` (returned e.g. from ``xt::argwhere``) to ``xtensor``.
+     *
+     * @param idx vector of indices
+     *
+     * @return ``xt::xtensor<typename index_type::value_type, 2>`` (e.g. ``xt::xtensor<size_t, 2>``)
+     */
+    template <class T>
+    inline auto from_indices(const std::vector<T>& idx)
+    {
+        using return_type = xtensor<typename T::value_type, 2>;
+        using size_type = typename return_type::size_type;
+
+        if (idx.size() == 0)
+        {
+            return return_type::from_shape({size_type(0), size_type(0)});
+        }
+
+        return_type out = return_type::from_shape({idx.size(), idx[0].size()});
+
+        for (size_type i = 0; i < out.shape()[0]; ++i)
+        {
+            for (size_type j = 0; j < out.shape()[1]; ++j)
+            {
+                out(i, j) = idx[i][j];
+            }
+        }
+
+        return out;
+    }
+
+    /**
+     * Converts ``std::vector<index_type>`` (returned e.g. from ``xt::argwhere``) to a flattened
+     * ``xtensor``.
+     *
+     * @param idx a vector of indices
+     *
+     * @return ``xt::xtensor<typename index_type::value_type, 1>`` (e.g. ``xt::xtensor<size_t, 1>``)
+     */
+    template <class T>
+    inline auto flatten_indices(const std::vector<T>& idx)
+    {
+        auto n = idx.size();
+        if (n != 0)
+        {
+            n *= idx[0].size();
+        }
+
+        using return_type = xtensor<typename T::value_type, 1>;
+        return_type out = return_type::from_shape({n});
+        auto iter = out.begin();
+        for_each(
+            idx.begin(),
+            idx.end(),
+            [&iter](const auto& t)
+            {
+                iter = std::copy(t.cbegin(), t.cend(), iter);
+            }
+        );
+
+        return out;
+    }
+
+    struct ravel_vector_tag;
+    struct ravel_tensor_tag;
+
+    namespace detail
+    {
+        template <class C, class Tag>
+        struct ravel_return_type;
+
+        template <class C>
+        struct ravel_return_type<C, ravel_vector_tag>
+        {
+            using index_type = typename C::value_type;
+            using value_type = typename index_type::value_type;
+            using type = std::vector<value_type>;
+
+            template <class T>
+            static std::vector<value_type> init(T n)
+            {
+                return std::vector<value_type>(n);
+            }
+        };
+
+        template <class C>
+        struct ravel_return_type<C, ravel_tensor_tag>
+        {
+            using index_type = typename C::value_type;
+            using value_type = typename index_type::value_type;
+            using type = xt::xtensor<value_type, 1>;
+
+            template <class T>
+            static xt::xtensor<value_type, 1> init(T n)
+            {
+                return xtensor<value_type, 1>::from_shape({n});
+            }
+        };
+    }
+
+    template <class C, class Tag>
+    using ravel_return_type_t = typename detail::ravel_return_type<C, Tag>::type;
+
+    /**
+     * Converts ``std::vector<index_type>`` (returned e.g. from ``xt::argwhere``) to ``xtensor``
+     * whereby the indices are ravelled. For 1-d input there is no conversion.
+     *
+     * @param idx vector of indices
+     * @param shape the shape of the original array
+     * @param l the layout type (row-major or column-major)
+     *
+     * @return ``xt::xtensor<typename index_type::value_type, 1>`` (e.g. ``xt::xtensor<size_t, 1>``)
+     */
+    template <class Tag = ravel_tensor_tag, class C, class S>
+    ravel_return_type_t<C, Tag>
+    ravel_indices(const C& idx, const S& shape, layout_type l = layout_type::row_major)
+    {
+        using return_type = typename detail::ravel_return_type<C, Tag>::type;
+        using value_type = typename detail::ravel_return_type<C, Tag>::value_type;
+        using strides_type = get_strides_t<S>;
+        strides_type strides = xtl::make_sequence<strides_type>(shape.size(), 0);
+        compute_strides(shape, l, strides);
+        return_type out = detail::ravel_return_type<C, Tag>::init(idx.size());
+        auto out_iter = out.begin();
+        auto idx_iter = idx.begin();
+        for (; out_iter != out.end(); ++out_iter, ++idx_iter)
+        {
+            *out_iter = element_offset<value_type>(strides, (*idx_iter).cbegin(), (*idx_iter).cend());
+        }
+        return out;
+    }
+}
+
+#endif

+ 137 - 0
3rd/numpy/include/xtensor/xtensor_config.hpp

@@ -0,0 +1,137 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_CONFIG_HPP
+#define XTENSOR_CONFIG_HPP
+
+#define XTENSOR_VERSION_MAJOR 0
+#define XTENSOR_VERSION_MINOR 25
+#define XTENSOR_VERSION_PATCH 0
+
+
+// Define if the library is going to be using exceptions.
+#if (!defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND))
+#undef XTENSOR_DISABLE_EXCEPTIONS
+#define XTENSOR_DISABLE_EXCEPTIONS
+#endif
+
+// Exception support.
+#if defined(XTENSOR_DISABLE_EXCEPTIONS)
+#include <iostream>
+#define XTENSOR_THROW(_, msg)          \
+    {                                  \
+        std::cerr << msg << std::endl; \
+        std::abort();                  \
+    }
+#else
+#define XTENSOR_THROW(exception, msg) throw exception(msg)
+#endif
+
+// Workaround for some missing constexpr functionality in MSVC 2015 and MSVC 2017 x86
+#if defined(_MSC_VER)
+#define XTENSOR_CONSTEXPR_ENHANCED const
+// The following must not be defined to const, otherwise
+// it prevents generation of copy operators of classes
+// containing XTENSOR_CONSTEXPR_ENHANCED_STATIC members
+#define XTENSOR_CONSTEXPR_ENHANCED_STATIC
+#define XTENSOR_CONSTEXPR_RETURN inline
+#else
+#define XTENSOR_CONSTEXPR_ENHANCED constexpr
+#define XTENSOR_CONSTEXPR_RETURN constexpr
+#define XTENSOR_CONSTEXPR_ENHANCED_STATIC constexpr static
+#define XTENSOR_HAS_CONSTEXPR_ENHANCED
+#endif
+
+#ifndef XTENSOR_DEFAULT_DATA_CONTAINER
+#define XTENSOR_DEFAULT_DATA_CONTAINER(T, A) uvector<T, A>
+#endif
+
+#ifndef XTENSOR_DEFAULT_SHAPE_CONTAINER
+#define XTENSOR_DEFAULT_SHAPE_CONTAINER(T, EA, SA) \
+    xt::svector<typename XTENSOR_DEFAULT_DATA_CONTAINER(T, EA)::size_type, 4, SA, true>
+#endif
+
+#ifdef XTENSOR_USE_XSIMD
+#include <xsimd/xsimd.hpp>
+#define XSIMD_DEFAULT_ALIGNMENT xsimd::default_arch::alignment()
+#endif
+
+
+#ifndef XTENSOR_DEFAULT_ALLOCATOR
+#ifdef XTENSOR_ALLOC_TRACKING
+#ifndef XTENSOR_ALLOC_TRACKING_POLICY
+#define XTENSOR_ALLOC_TRACKING_POLICY xt::alloc_tracking::policy::print
+#endif
+#ifdef XTENSOR_USE_XSIMD
+#include <xsimd/xsimd.hpp>
+#define XTENSOR_DEFAULT_ALLOCATOR(T) \
+    xt::tracking_allocator<T, xsimd::aligned_allocator<T, XSIMD_DEFAULT_ALIGNMENT>, XTENSOR_ALLOC_TRACKING_POLICY>
+#else
+#define XTENSOR_DEFAULT_ALLOCATOR(T) \
+    xt::tracking_allocator<T, std::allocator<T>, XTENSOR_ALLOC_TRACKING_POLICY>
+#endif
+#else
+#ifdef XTENSOR_USE_XSIMD
+
+#define XTENSOR_DEFAULT_ALLOCATOR(T) xsimd::aligned_allocator<T, XTENSOR_DEFAULT_ALIGNMENT>
+#else
+#define XTENSOR_DEFAULT_ALLOCATOR(T) std::allocator<T>
+#endif
+#endif
+#endif
+
+#ifndef XTENSOR_DEFAULT_ALIGNMENT
+#ifdef XTENSOR_USE_XSIMD
+#define XTENSOR_DEFAULT_ALIGNMENT XSIMD_DEFAULT_ALIGNMENT
+#else
+#define XTENSOR_DEFAULT_ALIGNMENT 0
+#endif
+#endif
+
+#ifndef XTENSOR_DEFAULT_LAYOUT
+#define XTENSOR_DEFAULT_LAYOUT ::xt::layout_type::row_major
+#endif
+
+#ifndef XTENSOR_DEFAULT_TRAVERSAL
+#define XTENSOR_DEFAULT_TRAVERSAL ::xt::layout_type::row_major
+#endif
+
+#ifndef XTENSOR_OPENMP_TRESHOLD
+#define XTENSOR_OPENMP_TRESHOLD 0
+#endif
+
+#ifndef XTENSOR_TBB_THRESHOLD
+#define XTENSOR_TBB_THRESHOLD 0
+#endif
+
+#ifndef XTENSOR_SELECT_ALIGN
+#define XTENSOR_SELECT_ALIGN(T) (XTENSOR_DEFAULT_ALIGNMENT != 0 ? XTENSOR_DEFAULT_ALIGNMENT : alignof(T))
+#endif
+
+#ifndef XTENSOR_FIXED_ALIGN
+#define XTENSOR_FIXED_ALIGN XTENSOR_SELECT_ALIGN(void*)
+#endif
+
+#ifdef IN_DOXYGEN
+namespace xtl
+{
+    template <class... T>
+    struct conjunction
+    {
+        constexpr bool value = true;
+    };
+
+    template <class... C>
+    using check_concept = std::enable_if_t<conjunction<C...>::value, int>;
+
+#define XTL_REQUIRES(...) xtl::check_concept<__VA_ARGS__> = 0
+}
+#endif
+
+#endif

+ 209 - 0
3rd/numpy/include/xtensor/xtensor_forward.hpp

@@ -0,0 +1,209 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_FORWARD_HPP
+#define XTENSOR_FORWARD_HPP
+
+// This file contains forward declarations and
+// alias types to solve the problem of circular
+// includes. It should not contain anything else
+// and should not bring additional dependencies to
+// the files that include it. So:
+// - do NOT define classes of metafunctions here
+// - do NOT include other headers
+//
+// If you need to do so, something is probably
+// going wrong (either your change, or xtensor
+// needs to be refactored).
+
+#include <memory>
+#include <vector>
+
+#include <xtl/xoptional_sequence.hpp>
+
+#include "xlayout.hpp"
+#include "xtensor_config.hpp"
+
+namespace xt
+{
+    struct xtensor_expression_tag;
+    struct xoptional_expression_tag;
+
+    template <class C>
+    struct xcontainer_inner_types;
+
+    template <class D>
+    class xcontainer;
+
+    template <class T, class A>
+    class uvector;
+
+    template <class T, std::size_t N, class A, bool Init>
+    class svector;
+
+    template <
+        class EC,
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class SC = XTENSOR_DEFAULT_SHAPE_CONTAINER(typename EC::value_type, typename EC::allocator_type, std::allocator<typename EC::size_type>),
+        class Tag = xtensor_expression_tag>
+    class xarray_container;
+
+    /**
+     * @typedef xarray
+     * Alias template on xarray_container with default parameters for data container
+     * type and shape / strides container type. This allows to write
+     *
+     * @code{.cpp}
+     * xt::xarray<double> a = {{1., 2.}, {3., 4.}};
+     * @endcode
+     *
+     * instead of the heavier syntax
+     *
+     * @code{.cpp}
+     * xt::xarray_container<std::vector<double>, std::vector<std::size_t>> a = ...
+     * @endcode
+     *
+     * @tparam T The value type of the elements.
+     * @tparam L The layout_type of the xarray_container (default: XTENSOR_DEFAULT_LAYOUT).
+     * @tparam A The allocator of the container holding the elements.
+     * @tparam SA The allocator of the containers holding the shape and the strides.
+     */
+    template <
+        class T,
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class A = XTENSOR_DEFAULT_ALLOCATOR(T),
+        class SA = std::allocator<typename std::vector<T, A>::size_type>>
+    using xarray = xarray_container<XTENSOR_DEFAULT_DATA_CONTAINER(T, A), L, XTENSOR_DEFAULT_SHAPE_CONTAINER(T, A, SA)>;
+
+    template <
+        class EC,
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class SC = XTENSOR_DEFAULT_SHAPE_CONTAINER(typename EC::value_type, std::allocator<typename EC::size_type>, std::allocator<typename EC::size_type>),
+        class Tag = xtensor_expression_tag>
+    class xarray_adaptor;
+
+    /**
+     * @typedef xarray_optional
+     * Alias template on xarray_container for handling missing values
+     *
+     * @tparam T The value type of the elements.
+     * @tparam L The layout_type of the container (default: XTENSOR_DEFAULT_LAYOUT).
+     * @tparam A The allocator of the container holding the elements.
+     * @tparam BA The allocator of the container holding the missing flags.
+     * @tparam SA The allocator of the containers holding the shape and the strides.
+     */
+    template <
+        class T,
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class A = XTENSOR_DEFAULT_ALLOCATOR(T),
+        class BC = xtl::xdynamic_bitset<std::size_t>,
+        class SA = std::allocator<typename std::vector<T, A>::size_type>>
+    using xarray_optional = xarray_container<
+        xtl::xoptional_vector<T, A, BC>,
+        L,
+        XTENSOR_DEFAULT_SHAPE_CONTAINER(T, A, SA),
+        xoptional_expression_tag>;
+
+    template <class EC, std::size_t N, layout_type L = XTENSOR_DEFAULT_LAYOUT, class Tag = xtensor_expression_tag>
+    class xtensor_container;
+
+    /**
+     * @typedef xtensor
+     * Alias template on xtensor_container with default parameters for data container
+     * type. This allows to write
+     *
+     * @code{.cpp}
+     * xt::xtensor<double, 2> a = {{1., 2.}, {3., 4.}};
+     * @endcode
+     *
+     * instead of the heavier syntax
+     *
+     * @code{.cpp}
+     * xt::xtensor_container<std::vector<double>, 2> a = ...
+     * @endcode
+     *
+     * @tparam T The value type of the elements.
+     * @tparam N The dimension of the tensor.
+     * @tparam L The layout_type of the tensor (default: XTENSOR_DEFAULT_LAYOUT).
+     * @tparam A The allocator of the containers holding the elements.
+     */
+    template <class T, std::size_t N, layout_type L = XTENSOR_DEFAULT_LAYOUT, class A = XTENSOR_DEFAULT_ALLOCATOR(T)>
+    using xtensor = xtensor_container<XTENSOR_DEFAULT_DATA_CONTAINER(T, A), N, L>;
+
+    template <class EC, std::size_t N, layout_type L = XTENSOR_DEFAULT_LAYOUT, class Tag = xtensor_expression_tag>
+    class xtensor_adaptor;
+
+    template <class EC, std::size_t N, layout_type L = XTENSOR_DEFAULT_LAYOUT, class Tag = xtensor_expression_tag>
+    class xtensor_view;
+
+    template <std::size_t... N>
+    class fixed_shape;
+
+    /**
+     * @typedef xshape
+     * Alias template for ``fixed_shape`` allows for a shorter template shape definition in ``xtensor_fixed``.
+     */
+    template <std::size_t... N>
+    using xshape = fixed_shape<N...>;
+
+    template <class ET, class S, layout_type L = XTENSOR_DEFAULT_LAYOUT, bool Sharable = true, class Tag = xtensor_expression_tag>
+    class xfixed_container;
+
+    template <class ET, class S, layout_type L = XTENSOR_DEFAULT_LAYOUT, bool Sharable = true, class Tag = xtensor_expression_tag>
+    class xfixed_adaptor;
+
+    /**
+     * @typedef xtensor_fixed
+     * Alias template on xfixed_container with default parameters for layout
+     * type. This allows to write
+     *
+     * @code{.cpp}
+     * xt::xtensor_fixed<double, xt::xshape<2, 2>> a = {{1., 2.}, {3., 4.}};
+     * @endcode
+     *
+     * instead of the syntax
+     *
+     * @code{.cpp}
+     * xt::xfixed_container<double, xt::xshape<2, 2>, xt::layout_type::row_major> a = ...
+     * @endcode
+     *
+     * @tparam T The value type of the elements.
+     * @tparam FSH A xshape template shape.
+     * @tparam L The layout_type of the tensor (default: XTENSOR_DEFAULT_LAYOUT).
+     * @tparam Sharable Whether the tensor can be used in a shared expression.
+     */
+    template <class T, class FSH, layout_type L = XTENSOR_DEFAULT_LAYOUT, bool Sharable = true>
+    using xtensor_fixed = xfixed_container<T, FSH, L, Sharable>;
+
+    /**
+     * @typedef xtensor_optional
+     * Alias template on xtensor_container for handling missing values
+     *
+     * @tparam T The value type of the elements.
+     * @tparam N The dimension of the tensor.
+     * @tparam L The layout_type of the container (default: XTENSOR_DEFAULT_LAYOUT).
+     * @tparam A The allocator of the containers holding the elements.
+     * @tparam BA The allocator of the container holding the missing flags.
+     */
+    template <
+        class T,
+        std::size_t N,
+        layout_type L = XTENSOR_DEFAULT_LAYOUT,
+        class A = XTENSOR_DEFAULT_ALLOCATOR(T),
+        class BC = xtl::xdynamic_bitset<std::size_t>>
+    using xtensor_optional = xtensor_container<xtl::xoptional_vector<T, A, BC>, N, L, xoptional_expression_tag>;
+
+    template <class CT, class... S>
+    class xview;
+
+    template <class F, class... CT>
+    class xfunction;
+}
+
+#endif

+ 333 - 0
3rd/numpy/include/xtensor/xtensor_simd.hpp

@@ -0,0 +1,333 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_SIMD_HPP
+#define XTENSOR_SIMD_HPP
+
+#include <vector>
+
+#include <xtl/xdynamic_bitset.hpp>
+
+#include "xutils.hpp"
+
+#ifdef XTENSOR_USE_XSIMD
+
+#include <xsimd/xsimd.hpp>
+// #include <xsimd/memory/xsimd_load_store.hpp>
+
+#if defined(_MSV_VER) && (_MSV_VER < 1910)
+template <class T, class A>
+inline xsimd::batch_bool<T, A> isnan(const xsimd::batch<T, A>& b)
+{
+    return xsimd::isnan(b);
+}
+#endif
+
+namespace xt_simd
+{
+    template <class T, std::size_t A>
+    using aligned_allocator = xsimd::aligned_allocator<T, A>;
+
+    using aligned_mode = xsimd::aligned_mode;
+    using unaligned_mode = xsimd::unaligned_mode;
+
+    template <class A>
+    using allocator_alignment = xsimd::allocator_alignment<A>;
+
+    template <class A>
+    using allocator_alignment_t = xsimd::allocator_alignment_t<A>;
+
+    template <class C>
+    using container_alignment = xsimd::container_alignment<C>;
+
+    template <class C>
+    using container_alignment_t = xsimd::container_alignment_t<C>;
+
+    template <class T>
+    using simd_traits = xsimd::simd_traits<T>;
+
+    template <class T>
+    using revert_simd_traits = xsimd::revert_simd_traits<T>;
+
+    template <class T>
+    using simd_type = xsimd::simd_type<T>;
+
+    template <class T>
+    using simd_bool_type = xsimd::simd_bool_type<T>;
+
+    template <class T>
+    using revert_simd_type = xsimd::revert_simd_type<T>;
+
+    template <class T1, class T2>
+    using simd_return_type = xsimd::simd_return_type<T1, T2>;
+
+    using xsimd::broadcast_as;
+    using xsimd::get_alignment_offset;
+    using xsimd::load_as;
+    using xsimd::select;
+    using xsimd::store_as;
+
+    template <class V>
+    using is_batch_bool = xsimd::is_batch_bool<V>;
+
+    template <class V>
+    using is_batch_complex = xsimd::is_batch_complex<V>;
+
+    template <class T1, class T2>
+    using simd_condition = xsimd::detail::simd_condition<T1, T2>;
+}
+
+#else  // XTENSOR_USE_XSIMD
+
+namespace xt_simd
+{
+    template <class T, std::size_t A>
+    class aligned_allocator;
+
+    struct aligned_mode
+    {
+    };
+
+    struct unaligned_mode
+    {
+    };
+
+    template <class A>
+    struct allocator_alignment
+    {
+        using type = unaligned_mode;
+    };
+
+    template <class A>
+    using allocator_alignment_t = typename allocator_alignment<A>::type;
+
+    template <class C>
+    struct container_alignment
+    {
+        using type = unaligned_mode;
+    };
+
+    template <class C>
+    using container_alignment_t = typename container_alignment<C>::type;
+
+    template <class T>
+    struct simd_traits
+    {
+        using type = T;
+        using bool_type = bool;
+        using batch_bool = bool;
+        static constexpr std::size_t size = 1;
+    };
+
+    template <class T>
+    struct revert_simd_traits
+    {
+        using type = T;
+        static constexpr std::size_t size = simd_traits<type>::size;
+    };
+
+    template <class T>
+    using simd_type = typename simd_traits<T>::type;
+
+    template <class T>
+    using simd_bool_type = typename simd_traits<T>::bool_type;
+
+    template <class T>
+    using revert_simd_type = typename revert_simd_traits<T>::type;
+
+    template <class R, class T>
+    inline simd_type<R> broadcast_as(const T& value)
+    {
+        return value;
+    }
+
+    template <class R, class T>
+    inline simd_type<R> load_as(const T* src, aligned_mode)
+    {
+        return *src;
+    }
+
+    template <class R, class T>
+    inline simd_type<R> load_as(const T* src, unaligned_mode)
+    {
+        return *src;
+    }
+
+    template <class R, class T>
+    inline void store_as(R* dst, const simd_type<T>& src, aligned_mode)
+    {
+        *dst = src;
+    }
+
+    template <class R, class T>
+    inline void store_as(R* dst, const simd_type<T>& src, unaligned_mode)
+    {
+        *dst = src;
+    }
+
+    template <class T>
+    inline T select(bool cond, const T& t1, const T& t2)
+    {
+        return cond ? t1 : t2;
+    }
+
+    template <class T>
+    inline std::size_t get_alignment_offset(const T* /*p*/, std::size_t size, std::size_t /*block_size*/)
+    {
+        return size;
+    }
+
+    template <class T1, class T2>
+    using simd_return_type = simd_type<T2>;
+
+    template <class V>
+    struct is_batch_bool : std::false_type
+    {
+    };
+
+    template <class V>
+    struct is_batch_complex : std::false_type
+    {
+    };
+
+    template <class T1, class T2>
+    struct simd_condition : std::true_type
+    {
+    };
+}
+
+#endif  // XTENSOR_USE_XSIMD
+
+namespace xt
+{
+    using xt_simd::aligned_mode;
+    using xt_simd::unaligned_mode;
+
+    struct inner_aligned_mode
+    {
+    };
+
+    namespace detail
+    {
+        template <class A1, class A2>
+        struct driven_align_mode_impl
+        {
+            using type = std::conditional_t<std::is_same<A1, A2>::value, A1, ::xt_simd::unaligned_mode>;
+        };
+
+        template <class A>
+        struct driven_align_mode_impl<inner_aligned_mode, A>
+        {
+            using type = A;
+        };
+    }
+
+    template <class A1, class A2>
+    struct driven_align_mode
+    {
+        using type = typename detail::driven_align_mode_impl<A1, A2>::type;
+    };
+
+    template <class A1, class A2>
+    using driven_align_mode_t = typename detail::driven_align_mode_impl<A1, A2>::type;
+
+    namespace detail
+    {
+        template <class E, class T, class = void>
+        struct has_load_simd : std::false_type
+        {
+        };
+
+        template <class E, class T>
+        struct has_load_simd<
+            E,
+            T,
+            void_t<decltype(std::declval<E>().template load_simd<aligned_mode, T>(typename E::size_type(0)))>>
+            : std::true_type
+        {
+        };
+
+        template <class E, class T, bool B = xt_simd::simd_condition<typename E::value_type, T>::value>
+        struct has_simd_interface_impl : has_load_simd<E, T>
+        {
+        };
+
+        template <class E, class T>
+        struct has_simd_interface_impl<E, T, false> : std::false_type
+        {
+        };
+    }
+
+    template <class E, class T = typename std::decay_t<E>::value_type>
+    struct has_simd_interface : detail::has_simd_interface_impl<E, T>
+    {
+    };
+
+    template <class T>
+    struct has_simd_type : std::integral_constant<bool, !std::is_same<T, xt_simd::simd_type<T>>::value>
+    {
+    };
+
+    namespace detail
+    {
+        template <class F, class B, class = void>
+        struct has_simd_apply_impl : std::false_type
+        {
+        };
+
+        template <class F, class B>
+        struct has_simd_apply_impl<F, B, void_t<decltype(&F::template simd_apply<B>)>> : std::true_type
+        {
+        };
+    }
+
+    template <class F, class B>
+    struct has_simd_apply : detail::has_simd_apply_impl<F, B>
+    {
+    };
+
+    template <class T>
+    using bool_load_type = std::conditional_t<std::is_same<T, bool>::value, uint8_t, T>;
+
+    template <class T>
+    struct forbid_simd : std::false_type
+    {
+    };
+
+    template <class A>
+    struct forbid_simd<std::vector<bool, A>> : std::true_type
+    {
+    };
+
+    template <class A>
+    struct forbid_simd<const std::vector<bool, A>> : std::true_type
+    {
+    };
+
+    template <class B, class A>
+    struct forbid_simd<xtl::xdynamic_bitset<B, A>> : std::true_type
+    {
+    };
+
+    template <class B, class A>
+    struct forbid_simd<const xtl::xdynamic_bitset<B, A>> : std::true_type
+    {
+    };
+
+    template <class C, class T1, class T2>
+    struct container_simd_return_type
+        : std::enable_if<!forbid_simd<C>::value, xt_simd::simd_return_type<T1, bool_load_type<T2>>>
+    {
+    };
+
+    template <class C, class T1, class T2>
+    using container_simd_return_type_t = typename container_simd_return_type<C, T1, T2>::type;
+}
+
+#endif

+ 1134 - 0
3rd/numpy/include/xtensor/xutils.hpp

@@ -0,0 +1,1134 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_UTILS_HPP
+#define XTENSOR_UTILS_HPP
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <initializer_list>
+#include <iostream>
+#include <memory>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <xtl/xfunctional.hpp>
+#include <xtl/xmeta_utils.hpp>
+#include <xtl/xsequence.hpp>
+#include <xtl/xtype_traits.hpp>
+
+#include "xtensor_config.hpp"
+
+#if (_MSC_VER >= 1910)
+#define NOEXCEPT(T)
+#else
+#define NOEXCEPT(T) noexcept(T)
+#endif
+
+namespace xt
+{
+    /****************
+     * declarations *
+     ****************/
+
+    template <class T>
+    struct remove_class;
+
+    /*template <class F, class... T>
+    void for_each(F&& f, std::tuple<T...>& t) noexcept(implementation_dependent);*/
+
+    /*template <class F, class R, class... T>
+    R accumulate(F&& f, R init, const std::tuple<T...>& t) noexcept(implementation_dependent);*/
+
+    template <std::size_t I, class... Args>
+    constexpr decltype(auto) argument(Args&&... args) noexcept;
+
+    template <class R, class F, class... S>
+    R apply(std::size_t index, F&& func, const std::tuple<S...>& s) NOEXCEPT(noexcept(func(std::get<0>(s))));
+
+    template <class T, class S>
+    void nested_copy(T&& iter, const S& s);
+
+    template <class T, class S>
+    void nested_copy(T&& iter, std::initializer_list<S> s);
+
+    template <class C>
+    bool resize_container(C& c, typename C::size_type size);
+
+    template <class T, std::size_t N>
+    bool resize_container(std::array<T, N>& a, typename std::array<T, N>::size_type size);
+
+    template <std::size_t... I>
+    class fixed_shape;
+
+    template <std::size_t... I>
+    bool resize_container(fixed_shape<I...>& a, std::size_t size);
+
+    template <class X, class C>
+    struct rebind_container;
+
+    template <class X, class C>
+    using rebind_container_t = typename rebind_container<X, C>::type;
+
+    std::size_t normalize_axis(std::size_t dim, std::ptrdiff_t axis);
+
+    // gcc 4.9 is affected by C++14 defect CGW 1558
+    // see http://open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#1558
+    template <class... T>
+    struct make_void
+    {
+        using type = void;
+    };
+
+    template <class... T>
+    using void_t = typename make_void<T...>::type;
+
+    // This is used for non existent types (e.g. storage for some expressions
+    // like generators)
+    struct invalid_type
+    {
+    };
+
+    template <class... T>
+    struct make_invalid_type
+    {
+        using type = invalid_type;
+    };
+
+    template <class T, class R>
+    using disable_integral_t = std::enable_if_t<!xtl::is_integral<T>::value, R>;
+
+    /********************************
+     * meta identity implementation *
+     ********************************/
+
+    template <class T>
+    struct meta_identity
+    {
+        using type = T;
+    };
+
+    /***************************************
+     * is_specialization_of implementation *
+     ***************************************/
+
+    template <template <class...> class TT, class T>
+    struct is_specialization_of : std::false_type
+    {
+    };
+
+    template <template <class...> class TT, class... Ts>
+    struct is_specialization_of<TT, TT<Ts...>> : std::true_type
+    {
+    };
+
+    /*******************************
+     * remove_class implementation *
+     *******************************/
+
+    template <class T>
+    struct remove_class
+    {
+    };
+
+    template <class C, class R, class... Args>
+    struct remove_class<R (C::*)(Args...)>
+    {
+        typedef R type(Args...);
+    };
+
+    template <class C, class R, class... Args>
+    struct remove_class<R (C::*)(Args...) const>
+    {
+        typedef R type(Args...);
+    };
+
+    template <class T>
+    using remove_class_t = typename remove_class<T>::type;
+
+    /***************************
+     * for_each implementation *
+     ***************************/
+
+    namespace detail
+    {
+        template <std::size_t I, class F, class... T>
+        inline typename std::enable_if<I == sizeof...(T), void>::type
+        for_each_impl(F&& /*f*/, std::tuple<T...>& /*t*/) noexcept
+        {
+        }
+
+        template <std::size_t I, class F, class... T>
+            inline typename std::enable_if < I<sizeof...(T), void>::type
+            for_each_impl(F&& f, std::tuple<T...>& t) noexcept(noexcept(f(std::get<I>(t))))
+        {
+            f(std::get<I>(t));
+            for_each_impl<I + 1, F, T...>(std::forward<F>(f), t);
+        }
+    }
+
+    template <class F, class... T>
+    inline void for_each(F&& f, std::tuple<T...>& t) noexcept(
+        noexcept(detail::for_each_impl<0, F, T...>(std::forward<F>(f), t))
+    )
+    {
+        detail::for_each_impl<0, F, T...>(std::forward<F>(f), t);
+    }
+
+    namespace detail
+    {
+        template <std::size_t I, class F, class... T>
+        inline typename std::enable_if<I == sizeof...(T), void>::type
+        for_each_impl(F&& /*f*/, const std::tuple<T...>& /*t*/) noexcept
+        {
+        }
+
+        template <std::size_t I, class F, class... T>
+            inline typename std::enable_if < I<sizeof...(T), void>::type
+            for_each_impl(F&& f, const std::tuple<T...>& t) noexcept(noexcept(f(std::get<I>(t))))
+        {
+            f(std::get<I>(t));
+            for_each_impl<I + 1, F, T...>(std::forward<F>(f), t);
+        }
+    }
+
+    template <class F, class... T>
+    inline void for_each(F&& f, const std::tuple<T...>& t) noexcept(
+        noexcept(detail::for_each_impl<0, F, T...>(std::forward<F>(f), t))
+    )
+    {
+        detail::for_each_impl<0, F, T...>(std::forward<F>(f), t);
+    }
+
+    /*****************************
+     * accumulate implementation *
+     *****************************/
+
+    /// @cond DOXYGEN_INCLUDE_NOEXCEPT
+
+    namespace detail
+    {
+        template <std::size_t I, class F, class R, class... T>
+        inline std::enable_if_t<I == sizeof...(T), R>
+        accumulate_impl(F&& /*f*/, R init, const std::tuple<T...>& /*t*/) noexcept
+        {
+            return init;
+        }
+
+        template <std::size_t I, class F, class R, class... T>
+            inline std::enable_if_t < I<sizeof...(T), R>
+            accumulate_impl(F&& f, R init, const std::tuple<T...>& t) noexcept(noexcept(f(init, std::get<I>(t))))
+        {
+            R res = f(init, std::get<I>(t));
+            return accumulate_impl<I + 1, F, R, T...>(std::forward<F>(f), res, t);
+        }
+    }
+
+    template <class F, class R, class... T>
+    inline R accumulate(F&& f, R init, const std::tuple<T...>& t) noexcept(
+        noexcept(detail::accumulate_impl<0, F, R, T...>(std::forward<F>(f), init, t))
+    )
+    {
+        return detail::accumulate_impl<0, F, R, T...>(std::forward<F>(f), init, t);
+    }
+
+    /// @endcond
+
+    /***************************
+     * argument implementation *
+     ***************************/
+
+    namespace detail
+    {
+        template <std::size_t I>
+        struct getter
+        {
+            template <class Arg, class... Args>
+            static constexpr decltype(auto) get(Arg&& /*arg*/, Args&&... args) noexcept
+            {
+                return getter<I - 1>::get(std::forward<Args>(args)...);
+            }
+        };
+
+        template <>
+        struct getter<0>
+        {
+            template <class Arg, class... Args>
+            static constexpr Arg&& get(Arg&& arg, Args&&... /*args*/) noexcept
+            {
+                return std::forward<Arg>(arg);
+            }
+        };
+    }
+
+    template <std::size_t I, class... Args>
+    constexpr decltype(auto) argument(Args&&... args) noexcept
+    {
+        static_assert(I < sizeof...(Args), "I should be lesser than sizeof...(Args)");
+        return detail::getter<I>::get(std::forward<Args>(args)...);
+    }
+
+    /************************
+     * apply implementation *
+     ************************/
+
+    namespace detail
+    {
+        template <class R, class F, std::size_t I, class... S>
+        R apply_one(F&& func, const std::tuple<S...>& s) NOEXCEPT(noexcept(func(std::get<I>(s))))
+        {
+            return static_cast<R>(func(std::get<I>(s)));
+        }
+
+        template <class R, class F, std::size_t... I, class... S>
+        R apply(std::size_t index, F&& func, std::index_sequence<I...> /*seq*/, const std::tuple<S...>& s)
+            NOEXCEPT(noexcept(func(std::get<0>(s))))
+        {
+            using FT = std::add_pointer_t<R(F&&, const std::tuple<S...>&)>;
+            static const std::array<FT, sizeof...(I)> ar = {{&apply_one<R, F, I, S...>...}};
+            return ar[index](std::forward<F>(func), s);
+        }
+    }
+
+    template <class R, class F, class... S>
+    inline R apply(std::size_t index, F&& func, const std::tuple<S...>& s)
+        NOEXCEPT(noexcept(func(std::get<0>(s))))
+    {
+        return detail::apply<R>(index, std::forward<F>(func), std::make_index_sequence<sizeof...(S)>(), s);
+    }
+
+    /***************************
+     * nested_initializer_list *
+     ***************************/
+
+    template <class T, std::size_t I>
+    struct nested_initializer_list
+    {
+        using type = std::initializer_list<typename nested_initializer_list<T, I - 1>::type>;
+    };
+
+    template <class T>
+    struct nested_initializer_list<T, 0>
+    {
+        using type = T;
+    };
+
+    template <class T, std::size_t I>
+    using nested_initializer_list_t = typename nested_initializer_list<T, I>::type;
+
+    /******************************
+     * nested_copy implementation *
+     ******************************/
+
+    template <class T, class S>
+    inline void nested_copy(T&& iter, const S& s)
+    {
+        *iter++ = s;
+    }
+
+    template <class T, class S>
+    inline void nested_copy(T&& iter, std::initializer_list<S> s)
+    {
+        for (auto it = s.begin(); it != s.end(); ++it)
+        {
+            nested_copy(std::forward<T>(iter), *it);
+        }
+    }
+
+    /***********************************
+     * resize_container implementation *
+     ***********************************/
+    template <class C>
+    inline bool resize_container(C& c, typename C::size_type size)
+    {
+        c.resize(size);
+        return true;
+    }
+
+    template <class T, std::size_t N>
+    inline bool resize_container(std::array<T, N>& /*a*/, typename std::array<T, N>::size_type size)
+    {
+        return size == N;
+    }
+
+    template <std::size_t... I>
+    inline bool resize_container(xt::fixed_shape<I...>&, std::size_t size)
+    {
+        return sizeof...(I) == size;
+    }
+
+    /*********************************
+     * normalize_axis implementation *
+     *********************************/
+
+    // scalar normalize axis
+    inline std::size_t normalize_axis(std::size_t dim, std::ptrdiff_t axis)
+    {
+        return axis < 0 ? static_cast<std::size_t>(static_cast<std::ptrdiff_t>(dim) + axis)
+                        : static_cast<std::size_t>(axis);
+    }
+
+    template <class E, class C>
+    inline std::enable_if_t<
+        !xtl::is_integral<std::decay_t<C>>::value && xtl::is_signed<typename std::decay_t<C>::value_type>::value,
+        rebind_container_t<std::size_t, std::decay_t<C>>>
+    normalize_axis(E& expr, C&& axes)
+    {
+        rebind_container_t<std::size_t, std::decay_t<C>> res;
+        resize_container(res, axes.size());
+
+        for (std::size_t i = 0; i < axes.size(); ++i)
+        {
+            res[i] = normalize_axis(expr.dimension(), axes[i]);
+        }
+
+        XTENSOR_ASSERT(std::all_of(
+            res.begin(),
+            res.end(),
+            [&expr](auto ax_el)
+            {
+                return ax_el < expr.dimension();
+            }
+        ));
+
+        return res;
+    }
+
+    template <class C, class E>
+    inline std::enable_if_t<
+        !xtl::is_integral<std::decay_t<C>>::value && std::is_unsigned<typename std::decay_t<C>::value_type>::value,
+        C&&>
+    normalize_axis(E& expr, C&& axes)
+    {
+        static_cast<void>(expr);
+        XTENSOR_ASSERT(std::all_of(
+            axes.begin(),
+            axes.end(),
+            [&expr](auto ax_el)
+            {
+                return ax_el < expr.dimension();
+            }
+        ));
+        return std::forward<C>(axes);
+    }
+
+    template <class R, class E, class C>
+    inline auto forward_normalize(E& expr, C&& axes)
+        -> std::enable_if_t<xtl::is_signed<std::decay_t<decltype(*std::begin(axes))>>::value, R>
+    {
+        R res;
+        xt::resize_container(res, xtl::sequence_size(axes));
+        auto dim = expr.dimension();
+        std::transform(
+            std::begin(axes),
+            std::end(axes),
+            std::begin(res),
+            [&dim](auto ax_el)
+            {
+                return normalize_axis(dim, ax_el);
+            }
+        );
+
+        XTENSOR_ASSERT(std::all_of(
+            res.begin(),
+            res.end(),
+            [&expr](auto ax_el)
+            {
+                return ax_el < expr.dimension();
+            }
+        ));
+
+        return res;
+    }
+
+    template <class R, class E, class C>
+    inline auto forward_normalize(E& expr, C&& axes) -> std::enable_if_t<
+        !xtl::is_signed<std::decay_t<decltype(*std::begin(axes))>>::value && !std::is_same<R, std::decay_t<C>>::value,
+        R>
+    {
+        static_cast<void>(expr);
+
+        R res;
+        xt::resize_container(res, xtl::sequence_size(axes));
+        std::copy(std::begin(axes), std::end(axes), std::begin(res));
+        XTENSOR_ASSERT(std::all_of(
+            res.begin(),
+            res.end(),
+            [&expr](auto ax_el)
+            {
+                return ax_el < expr.dimension();
+            }
+        ));
+        return res;
+    }
+
+    template <class R, class E, class C>
+    inline auto forward_normalize(E& expr, C&& axes) -> std::enable_if_t<
+        !xtl::is_signed<std::decay_t<decltype(*std::begin(axes))>>::value && std::is_same<R, std::decay_t<C>>::value,
+        R&&>
+    {
+        static_cast<void>(expr);
+        XTENSOR_ASSERT(std::all_of(
+            std::begin(axes),
+            std::end(axes),
+            [&expr](auto ax_el)
+            {
+                return ax_el < expr.dimension();
+            }
+        ));
+        return std::move(axes);
+    }
+
+    /******************
+     * get_value_type *
+     ******************/
+
+    template <class T, class = void_t<>>
+    struct get_value_type
+    {
+        using type = T;
+    };
+
+    template <class T>
+    struct get_value_type<T, void_t<typename T::value_type>>
+    {
+        using type = typename T::value_type;
+    };
+
+    template <class T>
+    using get_value_type_t = typename get_value_type<T>::type;
+
+    /**********************
+     * get implementation *
+     **********************/
+
+    // When subclassing from std::tuple not all compilers are able to correctly instantiate get
+    // See here: https://stackoverflow.com/a/37188019/2528668
+    template <std::size_t I, template <typename... Args> class T, typename... Args>
+    decltype(auto) get(T<Args...>&& v)
+    {
+        return std::get<I>(static_cast<std::tuple<Args...>&&>(v));
+    }
+
+    template <std::size_t I, template <typename... Args> class T, typename... Args>
+    decltype(auto) get(T<Args...>& v)
+    {
+        return std::get<I>(static_cast<std::tuple<Args...>&>(v));
+    }
+
+    template <std::size_t I, template <typename... Args> class T, typename... Args>
+    decltype(auto) get(const T<Args...>& v)
+    {
+        return std::get<I>(static_cast<const std::tuple<Args...>&>(v));
+    }
+
+    /***************************
+     * apply_cv implementation *
+     ***************************/
+
+    namespace detail
+    {
+        template <
+            class T,
+            class U,
+            bool = std::is_const<std::remove_reference_t<T>>::value,
+            bool = std::is_volatile<std::remove_reference_t<T>>::value>
+        struct apply_cv_impl
+        {
+            using type = U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T, U, true, false>
+        {
+            using type = const U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T, U, false, true>
+        {
+            using type = volatile U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T, U, true, true>
+        {
+            using type = const volatile U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, false, false>
+        {
+            using type = U&;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, true, false>
+        {
+            using type = const U&;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, false, true>
+        {
+            using type = volatile U&;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, true, true>
+        {
+            using type = const volatile U&;
+        };
+    }
+
+    template <class T, class U>
+    struct apply_cv
+    {
+        using type = typename detail::apply_cv_impl<T, U>::type;
+    };
+
+    template <class T, class U>
+    using apply_cv_t = typename apply_cv<T, U>::type;
+
+    /**************************
+     * to_array implementation *
+     ***************************/
+
+    namespace detail
+    {
+        template <class T, std::size_t N, std::size_t... I>
+        constexpr std::array<std::remove_cv_t<T>, N> to_array_impl(T (&a)[N], std::index_sequence<I...>)
+        {
+            return {{a[I]...}};
+        }
+    }
+
+    template <class T, std::size_t N>
+    constexpr std::array<std::remove_cv_t<T>, N> to_array(T (&a)[N])
+    {
+        return detail::to_array_impl(a, std::make_index_sequence<N>{});
+    }
+
+    /********************************
+     * sequence_size implementation *
+     ********************************/
+
+    // equivalent to std::size(c) in c++17
+    template <class C>
+    constexpr auto sequence_size(const C& c) -> decltype(c.size())
+    {
+        return c.size();
+    }
+
+    // equivalent to std::size(a) in c++17
+    template <class T, std::size_t N>
+    constexpr std::size_t sequence_size(const T (&)[N])
+    {
+        return N;
+    }
+
+    /***********************************
+     * has_storage_type implementation *
+     ***********************************/
+
+    template <class T, class = void>
+    struct has_storage_type : std::false_type
+    {
+    };
+
+    template <class T>
+    struct xcontainer_inner_types;
+
+    template <class T>
+    struct has_storage_type<T, void_t<typename xcontainer_inner_types<T>::storage_type>>
+        : xtl::negation<
+              std::is_same<typename std::remove_cv<typename xcontainer_inner_types<T>::storage_type>::type, invalid_type>>
+    {
+    };
+
+    /*************************************
+     * has_data_interface implementation *
+     *************************************/
+
+    template <class E, class = void>
+    struct has_data_interface : std::false_type
+    {
+    };
+
+    template <class E>
+    struct has_data_interface<E, void_t<decltype(std::declval<E>().data())>> : std::true_type
+    {
+    };
+
+    template <class E, class = void>
+    struct has_strides : std::false_type
+    {
+    };
+
+    template <class E>
+    struct has_strides<E, void_t<decltype(std::declval<E>().strides())>> : std::true_type
+    {
+    };
+
+    template <class E, class = void>
+    struct has_iterator_interface : std::false_type
+    {
+    };
+
+    template <class E>
+    struct has_iterator_interface<E, void_t<decltype(std::declval<E>().begin())>> : std::true_type
+    {
+    };
+
+    /******************************
+     * is_iterator implementation *
+     ******************************/
+
+    template <class E, class = void>
+    struct is_iterator : std::false_type
+    {
+    };
+
+    template <class E>
+    struct is_iterator<
+        E,
+        void_t<
+            decltype(*std::declval<const E>(), std::declval<const E>() == std::declval<const E>(), std::declval<const E>() != std::declval<const E>(), ++(*std::declval<E*>()), (*std::declval<E*>())++, std::true_type())>>
+        : std::true_type
+    {
+    };
+
+    /********************************************
+     * xtrivial_default_construct implemenation *
+     ********************************************/
+
+#if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE >= 7
+// has_trivial_default_constructor has not been available since libstdc++-7.
+#define XTENSOR_GLIBCXX_USE_CXX11_ABI 1
+#else
+#if defined(_GLIBCXX_USE_CXX11_ABI)
+#if _GLIBCXX_USE_CXX11_ABI || (defined(_GLIBCXX_USE_DUAL_ABI) && !_GLIBCXX_USE_DUAL_ABI)
+#define XTENSOR_GLIBCXX_USE_CXX11_ABI 1
+#endif
+#endif
+#endif
+
+#if !defined(__GNUG__) || defined(_LIBCPP_VERSION) || defined(XTENSOR_GLIBCXX_USE_CXX11_ABI)
+
+    template <class T>
+    using xtrivially_default_constructible = std::is_trivially_default_constructible<T>;
+
+#else
+
+    template <class T>
+    using xtrivially_default_constructible = std::has_trivial_default_constructor<T>;
+
+#endif
+#undef XTENSOR_GLIBCXX_USE_CXX11_ABI
+
+    /*************************
+     * conditional type cast *
+     *************************/
+
+    template <bool condition, class T>
+    struct conditional_cast_functor;
+
+    template <class T>
+    struct conditional_cast_functor<false, T> : public xtl::identity
+    {
+    };
+
+    template <class T>
+    struct conditional_cast_functor<true, T>
+    {
+        template <class U>
+        inline auto operator()(U&& u) const
+        {
+            return static_cast<T>(std::forward<U>(u));
+        }
+    };
+
+    /**
+     * @brief Perform a type cast when a condition is true.
+     * If <tt>condition</tt> is true, return <tt>static_cast<T>(u)</tt>,
+     * otherwise return <tt>u</tt> unchanged. This is useful when an unconditional
+     * static_cast would force undesired type conversions in some situations where
+     * an error or warning would be desired. The condition determines when the
+     * explicit cast is ok.
+     */
+    template <bool condition, class T, class U>
+    inline auto conditional_cast(U&& u)
+    {
+        return conditional_cast_functor<condition, T>()(std::forward<U>(u));
+    }
+
+    /**********************
+     * tracking allocator *
+     **********************/
+
+    namespace alloc_tracking
+    {
+        inline bool& enabled()
+        {
+            static bool enabled;
+            return enabled;
+        }
+
+        inline void enable()
+        {
+            enabled() = true;
+        }
+
+        inline void disable()
+        {
+            enabled() = false;
+        }
+
+        enum policy
+        {
+            print,
+            assert
+        };
+    }
+
+    template <class T, class A, alloc_tracking::policy P>
+    struct tracking_allocator : private A
+    {
+        using base_type = A;
+        using value_type = typename A::value_type;
+        using reference = typename A::reference;
+        using const_reference = typename A::const_reference;
+        using pointer = typename A::pointer;
+        using const_pointer = typename A::const_pointer;
+        using size_type = typename A::size_type;
+        using difference_type = typename A::difference_type;
+
+        tracking_allocator() = default;
+
+        T* allocate(std::size_t n)
+        {
+            if (alloc_tracking::enabled())
+            {
+                if (P == alloc_tracking::print)
+                {
+                    std::cout << "xtensor allocating: " << n << "" << std::endl;
+                }
+                else if (P == alloc_tracking::assert)
+                {
+                    XTENSOR_THROW(
+                        std::runtime_error,
+                        "xtensor allocation of " + std::to_string(n) + " elements detected"
+                    );
+                }
+            }
+            return base_type::allocate(n);
+        }
+
+        using base_type::construct;
+        using base_type::deallocate;
+        using base_type::destroy;
+
+        template <class U>
+        struct rebind
+        {
+            using traits = std::allocator_traits<A>;
+            using other = tracking_allocator<U, typename traits::template rebind_alloc<U>, P>;
+        };
+    };
+
+    template <class T, class AT, alloc_tracking::policy PT, class U, class AU, alloc_tracking::policy PU>
+    inline bool operator==(const tracking_allocator<T, AT, PT>&, const tracking_allocator<U, AU, PU>&)
+    {
+        return std::is_same<AT, AU>::value;
+    }
+
+    template <class T, class AT, alloc_tracking::policy PT, class U, class AU, alloc_tracking::policy PU>
+    inline bool operator!=(const tracking_allocator<T, AT, PT>& a, const tracking_allocator<U, AU, PU>& b)
+    {
+        return !(a == b);
+    }
+
+    /*****************
+     * has_assign_to *
+     *****************/
+
+    template <class E1, class E2, class = void>
+    struct has_assign_to : std::false_type
+    {
+    };
+
+    template <class E1, class E2>
+    struct has_assign_to<E1, E2, void_t<decltype(std::declval<const E2&>().assign_to(std::declval<E1&>()))>>
+        : std::true_type
+    {
+    };
+
+    /*************************************
+     * overlapping_memory_checker_traits *
+     *************************************/
+
+    template <class T, class Enable = void>
+    struct has_memory_address : std::false_type
+    {
+    };
+
+    template <class T>
+    struct has_memory_address<T, void_t<decltype(std::addressof(*std::declval<T>().begin()))>> : std::true_type
+    {
+    };
+
+    struct memory_range
+    {
+        // Checking pointer overlap is more correct in integer values,
+        // for more explanation check https://devblogs.microsoft.com/oldnewthing/20170927-00/?p=97095
+        const uintptr_t m_first = 0;
+        const uintptr_t m_last = 0;
+
+        explicit memory_range() = default;
+
+        template <class T>
+        explicit memory_range(T* first, T* last)
+            : m_first(reinterpret_cast<uintptr_t>(last < first ? last : first))
+            , m_last(reinterpret_cast<uintptr_t>(last < first ? first : last))
+        {
+        }
+
+        template <class T>
+        bool overlaps(T* first, T* last) const
+        {
+            if (first <= last)
+            {
+                return reinterpret_cast<uintptr_t>(first) <= m_last
+                       && reinterpret_cast<uintptr_t>(last) >= m_first;
+            }
+            else
+            {
+                return reinterpret_cast<uintptr_t>(last) <= m_last
+                       && reinterpret_cast<uintptr_t>(first) >= m_first;
+            }
+        }
+    };
+
+    template <class E, class Enable = void>
+    struct overlapping_memory_checker_traits
+    {
+        static bool check_overlap(const E&, const memory_range&)
+        {
+            return true;
+        }
+    };
+
+    template <class E>
+    struct overlapping_memory_checker_traits<E, std::enable_if_t<has_memory_address<E>::value>>
+    {
+        static bool check_overlap(const E& expr, const memory_range& dst_range)
+        {
+            if (expr.size() == 0)
+            {
+                return false;
+            }
+            else
+            {
+                return dst_range.overlaps(std::addressof(*expr.begin()), std::addressof(*expr.rbegin()));
+            }
+        }
+    };
+
+    struct overlapping_memory_checker_base
+    {
+        memory_range m_dst_range;
+
+        explicit overlapping_memory_checker_base() = default;
+
+        explicit overlapping_memory_checker_base(memory_range dst_memory_range)
+            : m_dst_range(std::move(dst_memory_range))
+        {
+        }
+
+        template <class E>
+        bool check_overlap(const E& expr) const
+        {
+            if (!m_dst_range.m_first || !m_dst_range.m_last)
+            {
+                return false;
+            }
+            else
+            {
+                return overlapping_memory_checker_traits<E>::check_overlap(expr, m_dst_range);
+            }
+        }
+    };
+
+    template <class Dst, class Enable = void>
+    struct overlapping_memory_checker : overlapping_memory_checker_base
+    {
+        explicit overlapping_memory_checker(const Dst&)
+            : overlapping_memory_checker_base()
+        {
+        }
+    };
+
+    template <class Dst>
+    struct overlapping_memory_checker<Dst, std::enable_if_t<has_memory_address<Dst>::value>>
+        : overlapping_memory_checker_base
+    {
+        explicit overlapping_memory_checker(const Dst& aDst)
+            : overlapping_memory_checker_base(
+                [&]()
+                {
+                    if (aDst.size() == 0)
+                    {
+                        return memory_range();
+                    }
+                    else
+                    {
+                        return memory_range(std::addressof(*aDst.begin()), std::addressof(*aDst.rbegin()));
+                    }
+                }()
+            )
+        {
+        }
+    };
+
+    template <class Dst>
+    auto make_overlapping_memory_checker(const Dst& a_dst)
+    {
+        return overlapping_memory_checker<Dst>(a_dst);
+    }
+
+    /********************
+     * rebind_container *
+     ********************/
+
+    template <class X, template <class, class> class C, class T, class A>
+    struct rebind_container<X, C<T, A>>
+    {
+        using traits = std::allocator_traits<A>;
+        using allocator = typename traits::template rebind_alloc<X>;
+        using type = C<X, allocator>;
+    };
+
+#if defined(__GNUC__) && __GNUC__ > 6 && !defined(__clang__) && __cplusplus >= 201703L
+    template <class X, class T, std::size_t N>
+    struct rebind_container<X, std::array<T, N>>
+    {
+        using type = std::array<X, N>;
+    };
+#else
+    template <class X, template <class, std::size_t> class C, class T, std::size_t N>
+    struct rebind_container<X, C<T, N>>
+    {
+        using type = C<X, N>;
+    };
+#endif
+
+    /********************
+     * get_strides_type *
+     ********************/
+
+    template <class S>
+    struct get_strides_type
+    {
+        using type = typename rebind_container<std::ptrdiff_t, S>::type;
+    };
+
+    template <std::size_t... I>
+    struct get_strides_type<fixed_shape<I...>>
+    {
+        // TODO we could compute the strides statically here.
+        //  But we'll need full constexpr support to have a
+        //  homogenous ``compute_strides`` method
+        using type = std::array<std::ptrdiff_t, sizeof...(I)>;
+    };
+
+    template <class CP, class O, class A>
+    class xbuffer_adaptor;
+
+    template <class CP, class O, class A>
+    struct get_strides_type<xbuffer_adaptor<CP, O, A>>
+    {
+        // In bindings this mapping is called by reshape_view with an inner shape of type
+        // xbuffer_adaptor.
+        // Since we cannot create a buffer adaptor holding data, we map it to an std::vector.
+        using type = std::vector<
+            typename xbuffer_adaptor<CP, O, A>::value_type,
+            typename xbuffer_adaptor<CP, O, A>::allocator_type>;
+    };
+
+
+    template <class C>
+    using get_strides_t = typename get_strides_type<C>::type;
+
+    /*******************
+     * inner_reference *
+     *******************/
+
+    template <class ST>
+    struct inner_reference
+    {
+        using storage_type = std::decay_t<ST>;
+        using type = std::conditional_t<
+            std::is_const<std::remove_reference_t<ST>>::value,
+            typename storage_type::const_reference,
+            typename storage_type::reference>;
+    };
+
+    template <class ST>
+    using inner_reference_t = typename inner_reference<ST>::type;
+
+    /************
+     * get_rank *
+     ************/
+
+    template <class E, typename = void>
+    struct get_rank
+    {
+        static constexpr std::size_t value = SIZE_MAX;
+    };
+
+    template <class E>
+    struct get_rank<E, decltype((void) E::rank, void())>
+    {
+        static constexpr std::size_t value = E::rank;
+    };
+
+    /******************
+     * has_fixed_rank *
+     ******************/
+
+    template <class E>
+    struct has_fixed_rank
+    {
+        using type = std::integral_constant<bool, get_rank<std::decay_t<E>>::value != SIZE_MAX>;
+    };
+
+    template <class E>
+    using has_fixed_rank_t = typename has_fixed_rank<std::decay_t<E>>::type;
+
+    /************
+     * has_rank *
+     ************/
+
+    template <class E, size_t N>
+    struct has_rank
+    {
+        using type = std::integral_constant<bool, get_rank<std::decay_t<E>>::value == N>;
+    };
+
+    template <class E, size_t N>
+    using has_rank_t = typename has_rank<std::decay_t<E>, N>::type;
+
+}
+
+#endif

+ 104 - 0
3rd/numpy/include/xtensor/xvectorize.hpp

@@ -0,0 +1,104 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_VECTORIZE_HPP
+#define XTENSOR_VECTORIZE_HPP
+
+#include <type_traits>
+#include <utility>
+
+#include "xfunction.hpp"
+#include "xutils.hpp"
+
+namespace xt
+{
+
+    /***************
+     * xvectorizer *
+     ***************/
+
+    template <class F, class R>
+    class xvectorizer
+    {
+    public:
+
+        template <class... E>
+        using xfunction_type = xfunction<F, xclosure_t<E>...>;
+
+        template <class Func, class = std::enable_if_t<!std::is_same<std::decay_t<Func>, xvectorizer>::value>>
+        xvectorizer(Func&& f);
+
+        template <class... E>
+        xfunction_type<E...> operator()(E&&... e) const;
+
+    private:
+
+        typename std::remove_reference<F>::type m_f;
+    };
+
+    namespace detail
+    {
+        template <class F>
+        using get_function_type = remove_class_t<decltype(&std::remove_reference_t<F>::operator())>;
+    }
+
+    template <class R, class... Args>
+    xvectorizer<R (*)(Args...), R> vectorize(R (*f)(Args...));
+
+    template <class F, class R, class... Args>
+    xvectorizer<F, R> vectorize(F&& f, R (*)(Args...));
+
+// Workaround for Visual Studio 15.7.1.
+// Error C2668 (ambiguous call to overloaded function) mistaking a declarations
+// for the definition of another overload.
+#ifndef _MSC_VER
+    template <class F>
+    auto vectorize(F&& f)
+        -> decltype(vectorize(std::forward<F>(f), std::declval<detail::get_function_type<F>*>()));
+#endif
+
+    /******************************
+     * xvectorizer implementation *
+     ******************************/
+
+    template <class F, class R>
+    template <class Func, class>
+    inline xvectorizer<F, R>::xvectorizer(Func&& f)
+        : m_f(std::forward<Func>(f))
+    {
+    }
+
+    template <class F, class R>
+    template <class... E>
+    inline auto xvectorizer<F, R>::operator()(E&&... e) const -> xfunction_type<E...>
+    {
+        return xfunction_type<E...>(m_f, std::forward<E>(e)...);
+    }
+
+    template <class R, class... Args>
+    inline xvectorizer<R (*)(Args...), R> vectorize(R (*f)(Args...))
+    {
+        return xvectorizer<R (*)(Args...), R>(f);
+    }
+
+    template <class F, class R, class... Args>
+    inline xvectorizer<F, R> vectorize(F&& f, R (*)(Args...))
+    {
+        return xvectorizer<F, R>(std::forward<F>(f));
+    }
+
+    template <class F>
+    inline auto vectorize(F&& f)
+        -> decltype(vectorize(std::forward<F>(f), std::declval<detail::get_function_type<F>*>()))
+    {
+        return vectorize(std::forward<F>(f), static_cast<detail::get_function_type<F>*>(nullptr));
+    }
+}
+
+#endif

+ 2317 - 0
3rd/numpy/include/xtensor/xview.hpp

@@ -0,0 +1,2317 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_VIEW_HPP
+#define XTENSOR_VIEW_HPP
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include <xtl/xclosure.hpp>
+#include <xtl/xmeta_utils.hpp>
+#include <xtl/xsequence.hpp>
+#include <xtl/xtype_traits.hpp>
+
+#include "xaccessible.hpp"
+#include "xarray.hpp"
+#include "xbroadcast.hpp"
+#include "xcontainer.hpp"
+#include "xiterable.hpp"
+#include "xsemantic.hpp"
+#include "xslice.hpp"
+#include "xtensor.hpp"
+#include "xtensor_config.hpp"
+#include "xtensor_forward.hpp"
+#include "xview_utils.hpp"
+
+namespace xt
+{
+
+    /*******************
+     * xview extension *
+     *******************/
+
+    namespace extension
+    {
+        template <class Tag, class CT, class... S>
+        struct xview_base_impl;
+
+        template <class CT, class... S>
+        struct xview_base_impl<xtensor_expression_tag, CT, S...>
+        {
+            using type = xtensor_empty_base;
+        };
+
+        template <class CT, class... S>
+        struct xview_base : xview_base_impl<xexpression_tag_t<CT>, CT, S...>
+        {
+        };
+
+        template <class CT, class... S>
+        using xview_base_t = typename xview_base<CT, S...>::type;
+    }
+
+    /*********************
+     * xview declaration *
+     *********************/
+
+    template <bool is_const, class CT, class... S>
+    class xview_stepper;
+
+    template <class ST, class... S>
+    struct xview_shape_type;
+
+    namespace detail
+    {
+
+        template <class T>
+        struct is_xrange : std::false_type
+        {
+        };
+
+        template <class T>
+        struct is_xrange<xrange<T>> : std::true_type
+        {
+        };
+
+        template <class S>
+        struct is_xall_slice : std::false_type
+        {
+        };
+
+        template <class T>
+        struct is_xall_slice<xall<T>> : std::true_type
+        {
+        };
+
+        template <layout_type L, bool valid, bool all_seen, bool range_seen, class V>
+        struct is_contiguous_view_impl
+        {
+            static constexpr bool value = false;
+        };
+
+        template <class T>
+        struct static_dimension
+        {
+            static constexpr std::ptrdiff_t value = -1;
+        };
+
+        template <class T, std::size_t N>
+        struct static_dimension<std::array<T, N>>
+        {
+            static constexpr std::ptrdiff_t value = static_cast<std::ptrdiff_t>(N);
+        };
+
+        template <class T, std::size_t N>
+        struct static_dimension<xt::const_array<T, N>>
+        {
+            static constexpr std::ptrdiff_t value = static_cast<std::ptrdiff_t>(N);
+        };
+
+        template <std::size_t... I>
+        struct static_dimension<xt::fixed_shape<I...>>
+        {
+            static constexpr std::ptrdiff_t value = sizeof...(I);
+        };
+
+        // if we have the same number of integers as we have static dimensions
+        // this can be interpreted like a xscalar
+        template <class CT, class... S>
+        struct is_xscalar_impl<xview<CT, S...>>
+        {
+            static constexpr bool value = static_cast<std::ptrdiff_t>(integral_count<S...>()
+                                          ) == static_dimension<typename std::decay_t<CT>::shape_type>::value
+                                              ? true
+                                              : false;
+        };
+
+        template <class S>
+        struct is_strided_slice_impl : std::true_type
+        {
+        };
+
+        template <class T>
+        struct is_strided_slice_impl<xkeep_slice<T>> : std::false_type
+        {
+        };
+
+        template <class T>
+        struct is_strided_slice_impl<xdrop_slice<T>> : std::false_type
+        {
+        };
+
+        // If we have no discontiguous slices, we can calculate strides for this view.
+        template <class E, class... S>
+        struct is_strided_view
+            : std::integral_constant<
+                  bool,
+                  xtl::conjunction<has_data_interface<E>, is_strided_slice_impl<std::decay_t<S>>...>::value>
+        {
+        };
+
+        // if row major the view can only be (statically) computed as contiguous if:
+        // any number of integers is followed by either one or no range which
+        // are followed by explicit (or implicit) all's
+        //
+        // e.g.
+        //      (i, j, all(), all()) == contiguous
+        //      (i, range(0, 2), all()) == contiguous
+        //      (i) == contiguous (implicit all slices)
+        //      (i, all(), j) == *not* contiguous
+        //      (i, range(0, 2), range(0, 2)) == *not* contiguous etc.
+        template <bool valid, bool all_seen, bool range_seen, class V>
+        struct is_contiguous_view_impl<layout_type::row_major, valid, all_seen, range_seen, V>
+        {
+            using slice = xtl::mpl::front_t<V>;
+            static constexpr bool is_range_slice = is_xrange<slice>::value;
+            static constexpr bool is_int_slice = xtl::is_integral<slice>::value;
+            static constexpr bool is_all_slice = is_xall_slice<slice>::value;
+            static constexpr bool have_all_seen = all_seen || is_all_slice;
+            static constexpr bool have_range_seen = is_range_slice;
+
+            static constexpr bool is_valid = valid
+                                             && (have_all_seen
+                                                     ? is_all_slice
+                                                     : (!range_seen && (is_int_slice || is_range_slice)));
+
+            static constexpr bool value = is_contiguous_view_impl < layout_type::row_major, is_valid,
+                                  have_all_seen, range_seen || is_range_slice,
+                                  xtl::mpl::pop_front_t < V >> ::value;
+        };
+
+        template <bool valid, bool all_seen, bool range_seen>
+        struct is_contiguous_view_impl<layout_type::row_major, valid, all_seen, range_seen, xtl::mpl::vector<>>
+        {
+            static constexpr bool value = valid;
+        };
+
+        // For column major the *same* but reverse is true -- with the additional
+        // constraint that we have to know the dimension at compile time otherwise
+        // we cannot make the decision as there might be implicit all's following.
+        template <bool valid, bool int_seen, bool range_seen, class V>
+        struct is_contiguous_view_impl<layout_type::column_major, valid, int_seen, range_seen, V>
+        {
+            using slice = xtl::mpl::front_t<V>;
+            static constexpr bool is_range_slice = is_xrange<slice>::value;
+            static constexpr bool is_int_slice = xtl::is_integral<slice>::value;
+            static constexpr bool is_all_slice = is_xall_slice<slice>::value;
+
+            static constexpr bool have_int_seen = int_seen || is_int_slice;
+
+            static constexpr bool is_valid = valid
+                                             && (have_int_seen
+                                                     ? is_int_slice
+                                                     : (!range_seen && (is_all_slice || is_range_slice)));
+            static constexpr bool value = is_contiguous_view_impl < layout_type::column_major, is_valid,
+                                  have_int_seen, is_range_slice || range_seen,
+                                  xtl::mpl::pop_front_t < V >> ::value;
+        };
+
+        template <bool valid, bool int_seen, bool range_seen>
+        struct is_contiguous_view_impl<layout_type::column_major, valid, int_seen, range_seen, xtl::mpl::vector<>>
+        {
+            static constexpr bool value = valid;
+        };
+
+        // TODO relax has_data_interface constraint here!
+        template <class E, class... S>
+        struct is_contiguous_view
+            : std::integral_constant<
+                  bool,
+                  has_data_interface<E>::value
+                      && !(
+                          E::static_layout == layout_type::column_major
+                          && static_cast<std::size_t>(static_dimension<typename E::shape_type>::value) != sizeof...(S)
+                      )
+                      && is_contiguous_view_impl<E::static_layout, true, false, false, xtl::mpl::vector<S...>>::value>
+        {
+        };
+
+        template <layout_type L, class T, std::ptrdiff_t offset>
+        struct unwrap_offset_container
+        {
+            using type = void;
+        };
+
+        template <class T, std::ptrdiff_t offset>
+        struct unwrap_offset_container<layout_type::row_major, T, offset>
+        {
+            using type = sequence_view<T, offset, static_dimension<T>::value>;
+        };
+
+        template <class T, std::ptrdiff_t start, std::ptrdiff_t end, std::ptrdiff_t offset>
+        struct unwrap_offset_container<layout_type::row_major, sequence_view<T, start, end>, offset>
+        {
+            using type = sequence_view<T, start + offset, end>;
+        };
+
+        template <class T, std::ptrdiff_t offset>
+        struct unwrap_offset_container<layout_type::column_major, T, offset>
+        {
+            using type = sequence_view<T, 0, static_dimension<T>::value - offset>;
+        };
+
+        template <class T, std::ptrdiff_t start, std::ptrdiff_t end, std::ptrdiff_t offset>
+        struct unwrap_offset_container<layout_type::column_major, sequence_view<T, start, end>, offset>
+        {
+            using type = sequence_view<T, start, end - offset>;
+        };
+
+        template <class E, class... S>
+        struct get_contigous_shape_type
+        {
+            // if we have no `range` in the slices we can re-use the shape with an offset
+            using type = std::conditional_t<
+                xtl::disjunction<is_xrange<S>...>::value,
+                typename xview_shape_type<typename E::shape_type, S...>::type,
+                // In the false branch we know that we have only integers at the front OR end, and NO range
+                typename unwrap_offset_container<E::static_layout, typename E::inner_shape_type, integral_count<S...>()>::type>;
+        };
+
+        template <class T>
+        struct is_sequence_view : std::integral_constant<bool, false>
+        {
+        };
+
+        template <class T, std::ptrdiff_t S, std::ptrdiff_t E>
+        struct is_sequence_view<sequence_view<T, S, E>> : std::integral_constant<bool, true>
+        {
+        };
+    }
+
+    template <class CT, class... S>
+    struct xcontainer_inner_types<xview<CT, S...>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+        using reference = inner_reference_t<CT>;
+        using const_reference = typename xexpression_type::const_reference;
+        using size_type = typename xexpression_type::size_type;
+        using temporary_type = view_temporary_type_t<xexpression_type, S...>;
+
+        static constexpr layout_type layout = detail::is_contiguous_view<xexpression_type, S...>::value
+                                                  ? xexpression_type::static_layout
+                                                  : layout_type::dynamic;
+
+        static constexpr bool is_const = std::is_const<std::remove_reference_t<CT>>::value;
+
+        using extract_storage_type = xtl::mpl::eval_if_t<
+            has_data_interface<xexpression_type>,
+            detail::expr_storage_type<xexpression_type>,
+            make_invalid_type<>>;
+        using storage_type = std::conditional_t<is_const, const extract_storage_type, extract_storage_type>;
+    };
+
+    template <class CT, class... S>
+    struct xiterable_inner_types<xview<CT, S...>>
+    {
+        using xexpression_type = std::decay_t<CT>;
+
+        static constexpr bool is_strided_view = detail::is_strided_view<xexpression_type, S...>::value;
+        static constexpr bool is_contiguous_view = detail::is_contiguous_view<xexpression_type, S...>::value;
+
+        using inner_shape_type = std::conditional_t<
+            is_contiguous_view,
+            typename detail::get_contigous_shape_type<xexpression_type, S...>::type,
+            typename xview_shape_type<typename xexpression_type::shape_type, S...>::type>;
+
+        using stepper = std::conditional_t<
+            is_strided_view,
+            xstepper<xview<CT, S...>>,
+            xview_stepper<std::is_const<std::remove_reference_t<CT>>::value, CT, S...>>;
+
+        using const_stepper = std::conditional_t<
+            is_strided_view,
+            xstepper<const xview<CT, S...>>,
+            xview_stepper<true, std::remove_cv_t<CT>, S...>>;
+    };
+
+    /**
+     * @class xview
+     * @brief Multidimensional view with tensor semantic.
+     *
+     * The xview class implements a multidimensional view with tensor
+     * semantic. It is used to adapt the shape of an xexpression without
+     * changing it. xview is not meant to be used directly, but
+     * only with the \ref view helper functions.
+     *
+     * @tparam CT the closure type of the \ref xexpression to adapt
+     * @tparam S the slices type describing the shape adaptation
+     *
+     * @sa view, range, all, newaxis, keep, drop
+     */
+    template <class CT, class... S>
+    class xview : public xview_semantic<xview<CT, S...>>,
+                  public std::conditional_t<
+                      detail::is_contiguous_view<std::decay_t<CT>, S...>::value,
+                      xcontiguous_iterable<xview<CT, S...>>,
+                      xiterable<xview<CT, S...>>>,
+                  public xaccessible<xview<CT, S...>>,
+                  public extension::xview_base_t<CT, S...>
+    {
+    public:
+
+        using self_type = xview<CT, S...>;
+        using inner_types = xcontainer_inner_types<self_type>;
+        using xexpression_type = std::decay_t<CT>;
+        using semantic_base = xview_semantic<self_type>;
+        using temporary_type = typename xcontainer_inner_types<self_type>::temporary_type;
+
+        using accessible_base = xaccessible<self_type>;
+        using extension_base = extension::xview_base_t<CT, S...>;
+        using expression_tag = typename extension_base::expression_tag;
+
+        static constexpr bool is_const = std::is_const<std::remove_reference_t<CT>>::value;
+        using value_type = typename xexpression_type::value_type;
+        using simd_value_type = xt_simd::simd_type<value_type>;
+        using bool_load_type = typename xexpression_type::bool_load_type;
+        using reference = typename inner_types::reference;
+        using const_reference = typename inner_types::const_reference;
+        using pointer = std::
+            conditional_t<is_const, typename xexpression_type::const_pointer, typename xexpression_type::pointer>;
+        using const_pointer = typename xexpression_type::const_pointer;
+        using size_type = typename inner_types::size_type;
+        using difference_type = typename xexpression_type::difference_type;
+
+        static constexpr layout_type static_layout = inner_types::layout;
+        static constexpr bool contiguous_layout = static_layout != layout_type::dynamic;
+
+        static constexpr bool is_strided_view = detail::is_strided_view<xexpression_type, S...>::value;
+        static constexpr bool is_contiguous_view = contiguous_layout;
+
+        using iterable_base = xiterable<self_type>;
+        using inner_shape_type = typename iterable_base::inner_shape_type;
+        using shape_type = typename xview_shape_type<typename xexpression_type::shape_type, S...>::type;
+
+        using xexpression_inner_strides_type = xtl::mpl::eval_if_t<
+            has_strides<xexpression_type>,
+            detail::expr_inner_strides_type<xexpression_type>,
+            get_strides_type<shape_type>>;
+
+        using xexpression_inner_backstrides_type = xtl::mpl::eval_if_t<
+            has_strides<xexpression_type>,
+            detail::expr_inner_backstrides_type<xexpression_type>,
+            get_strides_type<shape_type>>;
+
+        using storage_type = typename inner_types::storage_type;
+
+        static constexpr bool has_trivial_strides = is_contiguous_view
+                                                    && !xtl::disjunction<detail::is_xrange<S>...>::value;
+        using inner_strides_type = std::conditional_t<
+            has_trivial_strides,
+            typename detail::unwrap_offset_container<
+                xexpression_type::static_layout,
+                xexpression_inner_strides_type,
+                integral_count<S...>()>::type,
+            get_strides_t<shape_type>>;
+
+        using inner_backstrides_type = std::conditional_t<
+            has_trivial_strides,
+            typename detail::unwrap_offset_container<
+                xexpression_type::static_layout,
+                xexpression_inner_backstrides_type,
+                integral_count<S...>()>::type,
+            get_strides_t<shape_type>>;
+
+        using strides_type = get_strides_t<shape_type>;
+        using backstrides_type = strides_type;
+
+
+        using slice_type = std::tuple<S...>;
+
+        using stepper = typename iterable_base::stepper;
+        using const_stepper = typename iterable_base::const_stepper;
+
+        using linear_iterator = std::conditional_t<
+            has_data_interface<xexpression_type>::value && is_strided_view,
+            std::conditional_t<is_const, typename xexpression_type::const_linear_iterator, typename xexpression_type::linear_iterator>,
+            typename iterable_base::linear_iterator>;
+        using const_linear_iterator = std::conditional_t<
+            has_data_interface<xexpression_type>::value && is_strided_view,
+            typename xexpression_type::const_linear_iterator,
+            typename iterable_base::const_linear_iterator>;
+
+        using reverse_linear_iterator = std::reverse_iterator<linear_iterator>;
+        using const_reverse_linear_iterator = std::reverse_iterator<const_linear_iterator>;
+
+        using container_iterator = pointer;
+        using const_container_iterator = const_pointer;
+        static constexpr std::size_t rank = SIZE_MAX;
+
+        // The FSL argument prevents the compiler from calling this constructor
+        // instead of the copy constructor when sizeof...(SL) == 0.
+        template <class CTA, class FSL, class... SL>
+        explicit xview(CTA&& e, FSL&& first_slice, SL&&... slices) noexcept;
+
+        xview(const xview&) = default;
+        self_type& operator=(const xview& rhs);
+
+        template <class E>
+        self_type& operator=(const xexpression<E>& e);
+
+        template <class E>
+        disable_xexpression<E, self_type>& operator=(const E& e);
+
+        const inner_shape_type& shape() const noexcept;
+        const slice_type& slices() const noexcept;
+        layout_type layout() const noexcept;
+        bool is_contiguous() const noexcept;
+        using accessible_base::shape;
+
+        template <class T>
+        void fill(const T& value);
+
+        template <class... Args>
+        reference operator()(Args... args);
+        template <class... Args>
+        reference unchecked(Args... args);
+        template <class It>
+        reference element(It first, It last);
+
+        template <class... Args>
+        const_reference operator()(Args... args) const;
+        template <class... Args>
+        const_reference unchecked(Args... args) const;
+        template <class It>
+        const_reference element(It first, It last) const;
+
+        xexpression_type& expression() noexcept;
+        const xexpression_type& expression() const noexcept;
+
+        template <class ST>
+        bool broadcast_shape(ST& shape, bool reuse_cache = false) const;
+
+        template <class ST>
+        bool has_linear_assign(const ST& strides) const;
+
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<!Enable, stepper> stepper_begin(const ST& shape);
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<!Enable, stepper> stepper_end(const ST& shape, layout_type l);
+
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<!Enable, const_stepper> stepper_begin(const ST& shape) const;
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<!Enable, const_stepper> stepper_end(const ST& shape, layout_type l) const;
+
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<Enable, stepper> stepper_begin(const ST& shape);
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<Enable, stepper> stepper_end(const ST& shape, layout_type l);
+
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<Enable, const_stepper> stepper_begin(const ST& shape) const;
+        template <class ST, bool Enable = is_strided_view>
+        std::enable_if_t<Enable, const_stepper> stepper_end(const ST& shape, layout_type l) const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value, storage_type&> storage();
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value, const storage_type&> storage() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, linear_iterator> linear_begin();
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, linear_iterator> linear_end();
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+        linear_begin() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+        linear_end() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+        linear_cbegin() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+        linear_cend() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, reverse_linear_iterator>
+        linear_rbegin();
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, reverse_linear_iterator>
+        linear_rend();
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+        linear_rbegin() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+        linear_rend() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+        linear_crbegin() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+        linear_crend() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const inner_strides_type&>
+        strides() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const inner_strides_type&>
+        backstrides() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_pointer> data() const;
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, pointer> data();
+
+        template <class T = xexpression_type>
+        std::enable_if_t<has_data_interface<T>::value && is_strided_view, std::size_t>
+        data_offset() const noexcept;
+
+        template <class It>
+        inline It data_xbegin_impl(It begin) const noexcept;
+
+        template <class It>
+        inline It data_xend_impl(It begin, layout_type l, size_type offset) const noexcept;
+        inline container_iterator data_xbegin() noexcept;
+        inline const_container_iterator data_xbegin() const noexcept;
+        inline container_iterator data_xend(layout_type l, size_type offset) noexcept;
+
+        inline const_container_iterator data_xend(layout_type l, size_type offset) const noexcept;
+
+        // Conversion operator enabled for statically "scalar" views
+        template <class ST = self_type, class = std::enable_if_t<is_xscalar<std::decay_t<ST>>::value, int>>
+        operator reference()
+        {
+            return (*this)();
+        }
+
+        template <class ST = self_type, class = std::enable_if_t<is_xscalar<std::decay_t<ST>>::value, int>>
+        operator const_reference() const
+        {
+            return (*this)();
+        }
+
+        size_type underlying_size(size_type dim) const;
+
+        xtl::xclosure_pointer<self_type&> operator&() &;
+        xtl::xclosure_pointer<const self_type&> operator&() const&;
+        xtl::xclosure_pointer<self_type> operator&() &&;
+
+        template <
+            class E,
+            class T = xexpression_type,
+            class = std::enable_if_t<has_data_interface<T>::value && is_contiguous_view, int>>
+        void assign_to(xexpression<E>& e, bool force_resize) const;
+
+        template <class E>
+        using rebind_t = xview<E, S...>;
+
+        template <class E>
+        rebind_t<E> build_view(E&& e) const;
+
+        //
+        // SIMD interface
+        //
+
+        template <class requested_type>
+        using simd_return_type = xt_simd::simd_return_type<value_type, requested_type>;
+
+        template <class T, class R>
+        using enable_simd_interface = std::enable_if_t<has_simd_interface<T>::value && is_strided_view, R>;
+
+        template <class align, class simd, class T = xexpression_type>
+        enable_simd_interface<T, void> store_simd(size_type i, const simd& e);
+
+        template <
+            class align,
+            class requested_type = value_type,
+            std::size_t N = xt_simd::simd_traits<requested_type>::size,
+            class T = xexpression_type>
+        enable_simd_interface<T, simd_return_type<requested_type>> load_simd(size_type i) const;
+
+        template <class T = xexpression_type>
+        enable_simd_interface<T, reference> data_element(size_type i);
+
+        template <class T = xexpression_type>
+        enable_simd_interface<T, const_reference> data_element(size_type i) const;
+
+        template <class T = xexpression_type>
+        enable_simd_interface<T, reference> flat(size_type i);
+
+        template <class T = xexpression_type>
+        enable_simd_interface<T, const_reference> flat(size_type i) const;
+
+    private:
+
+        // VS 2015 workaround (yes, really)
+        template <std::size_t I>
+        struct lesser_condition
+        {
+            static constexpr bool value = (I + newaxis_count_before<S...>(I + 1) < sizeof...(S));
+        };
+
+        CT m_e;
+        slice_type m_slices;
+        inner_shape_type m_shape;
+        mutable inner_strides_type m_strides;
+        mutable inner_backstrides_type m_backstrides;
+        mutable std::size_t m_data_offset;
+        mutable bool m_strides_computed;
+
+        template <class CTA, class FSL, class... SL>
+        explicit xview(std::true_type, CTA&& e, FSL&& first_slice, SL&&... slices) noexcept;
+
+        template <class CTA, class FSL, class... SL>
+        explicit xview(std::false_type, CTA&& e, FSL&& first_slice, SL&&... slices) noexcept;
+
+        template <class... Args>
+        auto make_index_sequence(Args... args) const noexcept;
+
+        void compute_strides(std::true_type) const;
+        void compute_strides(std::false_type) const;
+
+        reference access();
+
+        template <class Arg, class... Args>
+        reference access(Arg arg, Args... args);
+
+        const_reference access() const;
+
+        template <class Arg, class... Args>
+        const_reference access(Arg arg, Args... args) const;
+
+        template <typename std::decay_t<CT>::size_type... I, class... Args>
+        reference unchecked_impl(std::index_sequence<I...>, Args... args);
+
+        template <typename std::decay_t<CT>::size_type... I, class... Args>
+        const_reference unchecked_impl(std::index_sequence<I...>, Args... args) const;
+
+        template <typename std::decay_t<CT>::size_type... I, class... Args>
+        reference access_impl(std::index_sequence<I...>, Args... args);
+
+        template <typename std::decay_t<CT>::size_type... I, class... Args>
+        const_reference access_impl(std::index_sequence<I...>, Args... args) const;
+
+        template <typename std::decay_t<CT>::size_type I, class... Args>
+        std::enable_if_t<lesser_condition<I>::value, size_type> index(Args... args) const;
+
+        template <typename std::decay_t<CT>::size_type I, class... Args>
+        std::enable_if_t<!lesser_condition<I>::value, size_type> index(Args... args) const;
+
+        template <typename std::decay_t<CT>::size_type, class T>
+        size_type sliced_access(const xslice<T>& slice) const;
+
+        template <typename std::decay_t<CT>::size_type I, class T, class Arg, class... Args>
+        size_type sliced_access(const xslice<T>& slice, Arg arg, Args... args) const;
+
+        template <typename std::decay_t<CT>::size_type I, class T, class... Args>
+        disable_xslice<T, size_type> sliced_access(const T& squeeze, Args...) const;
+
+        using base_index_type = xindex_type_t<typename xexpression_type::shape_type>;
+
+        template <class It>
+        base_index_type make_index(It first, It last) const;
+
+        void assign_temporary_impl(temporary_type&& tmp);
+
+        template <std::size_t... I>
+        std::size_t data_offset_impl(std::index_sequence<I...>) const noexcept;
+
+        template <std::size_t... I>
+        auto compute_strides_impl(std::index_sequence<I...>) const noexcept;
+
+        inner_shape_type compute_shape(std::true_type) const;
+        inner_shape_type compute_shape(std::false_type) const;
+
+        template <class E, std::size_t... I>
+        rebind_t<E> build_view_impl(E&& e, std::index_sequence<I...>) const;
+
+        friend class xview_semantic<xview<CT, S...>>;
+    };
+
+    template <class E, class... S>
+    auto view(E&& e, S&&... slices);
+
+    template <class E>
+    auto row(E&& e, std::ptrdiff_t index);
+
+    template <class E>
+    auto col(E&& e, std::ptrdiff_t index);
+
+    /*****************************
+     * xview_stepper declaration *
+     *****************************/
+
+    namespace detail
+    {
+        template <class V>
+        struct get_stepper_impl
+        {
+            using xexpression_type = typename V::xexpression_type;
+            using type = typename xexpression_type::stepper;
+        };
+
+        template <class V>
+        struct get_stepper_impl<const V>
+        {
+            using xexpression_type = typename V::xexpression_type;
+            using type = typename xexpression_type::const_stepper;
+        };
+    }
+
+    template <class V>
+    using get_stepper = typename detail::get_stepper_impl<V>::type;
+
+    template <bool is_const, class CT, class... S>
+    class xview_stepper
+    {
+    public:
+
+        using view_type = std::conditional_t<is_const, const xview<CT, S...>, xview<CT, S...>>;
+        using substepper_type = get_stepper<view_type>;
+
+        using value_type = typename substepper_type::value_type;
+        using reference = typename substepper_type::reference;
+        using pointer = typename substepper_type::pointer;
+        using difference_type = typename substepper_type::difference_type;
+        using size_type = typename view_type::size_type;
+
+        using shape_type = typename substepper_type::shape_type;
+
+        xview_stepper() = default;
+        xview_stepper(
+            view_type* view,
+            substepper_type it,
+            size_type offset,
+            bool end = false,
+            layout_type l = XTENSOR_DEFAULT_TRAVERSAL
+        );
+
+        reference operator*() const;
+
+        void step(size_type dim);
+        void step_back(size_type dim);
+        void step(size_type dim, size_type n);
+        void step_back(size_type dim, size_type n);
+        void reset(size_type dim);
+        void reset_back(size_type dim);
+
+        void to_begin();
+        void to_end(layout_type l);
+
+    private:
+
+        bool is_newaxis_slice(size_type index) const noexcept;
+        void to_end_impl(layout_type l);
+
+        template <class F>
+        void common_step_forward(size_type dim, F f);
+        template <class F>
+        void common_step_backward(size_type dim, F f);
+
+        template <class F>
+        void common_step_forward(size_type dim, size_type n, F f);
+        template <class F>
+        void common_step_backward(size_type dim, size_type n, F f);
+
+        template <class F>
+        void common_reset(size_type dim, F f, bool backwards);
+
+        view_type* p_view;
+        substepper_type m_it;
+        size_type m_offset;
+        std::array<std::size_t, sizeof...(S)> m_index_keeper;
+    };
+
+    // meta-function returning the shape type for an xview
+    template <class ST, class... S>
+    struct xview_shape_type
+    {
+        using type = ST;
+    };
+
+    template <class I, std::size_t L, class... S>
+    struct xview_shape_type<std::array<I, L>, S...>
+    {
+        using type = std::array<I, L - integral_count<S...>() + newaxis_count<S...>()>;
+    };
+
+    template <std::size_t... I, class... S>
+    struct xview_shape_type<fixed_shape<I...>, S...>
+    {
+        using type = typename xview_shape_type<std::array<std::size_t, sizeof...(I)>, S...>::type;
+    };
+
+    /************************
+     * xview implementation *
+     ************************/
+
+    /**
+     * @name Constructor
+     */
+
+    //@{
+    /**
+     * Constructs a view on the specified xexpression.
+     * Users should not call directly this constructor but
+     * use the view function instead.
+     * @param e the xexpression to adapt
+     * @param first_slice the first slice describing the view
+     * @param slices the slices list describing the view
+     * @sa view
+     */
+    template <class CT, class... S>
+    template <class CTA, class FSL, class... SL>
+    xview<CT, S...>::xview(CTA&& e, FSL&& first_slice, SL&&... slices) noexcept
+        : xview(
+            std::integral_constant<bool, has_trivial_strides>{},
+            std::forward<CTA>(e),
+            std::forward<FSL>(first_slice),
+            std::forward<SL>(slices)...
+        )
+    {
+    }
+
+    // trivial strides initializer
+    template <class CT, class... S>
+    template <class CTA, class FSL, class... SL>
+    xview<CT, S...>::xview(std::true_type, CTA&& e, FSL&& first_slice, SL&&... slices) noexcept
+        : m_e(std::forward<CTA>(e))
+        , m_slices(std::forward<FSL>(first_slice), std::forward<SL>(slices)...)
+        , m_shape(compute_shape(detail::is_sequence_view<inner_shape_type>{}))
+        , m_strides(m_e.strides())
+        , m_backstrides(m_e.backstrides())
+        , m_data_offset(data_offset_impl(std::make_index_sequence<sizeof...(S)>()))
+        , m_strides_computed(true)
+    {
+    }
+
+    template <class CT, class... S>
+    template <class CTA, class FSL, class... SL>
+    xview<CT, S...>::xview(std::false_type, CTA&& e, FSL&& first_slice, SL&&... slices) noexcept
+        : m_e(std::forward<CTA>(e))
+        , m_slices(std::forward<FSL>(first_slice), std::forward<SL>(slices)...)
+        , m_shape(compute_shape(std::false_type{}))
+        , m_strides_computed(false)
+    {
+    }
+
+    //@}
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::operator=(const xview& rhs) -> self_type&
+    {
+        temporary_type tmp(rhs);
+        return this->assign_temporary(std::move(tmp));
+    }
+
+    /**
+     * @name Extended copy semantic
+     */
+    //@{
+    /**
+     * The extended assignment operator.
+     */
+    template <class CT, class... S>
+    template <class E>
+    inline auto xview<CT, S...>::operator=(const xexpression<E>& e) -> self_type&
+    {
+        return semantic_base::operator=(e);
+    }
+
+    //@}
+
+    template <class CT, class... S>
+    template <class E>
+    inline auto xview<CT, S...>::operator=(const E& e) -> disable_xexpression<E, self_type>&
+    {
+        this->fill(e);
+        return *this;
+    }
+
+    /**
+     * @name Size and shape
+     */
+    //@{
+    /**
+     * Returns the shape of the view.
+     */
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::shape() const noexcept -> const inner_shape_type&
+    {
+        return m_shape;
+    }
+
+    /**
+     * Returns the slices of the view.
+     */
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::slices() const noexcept -> const slice_type&
+    {
+        return m_slices;
+    }
+
+    /**
+     * Returns the slices of the view.
+     */
+    template <class CT, class... S>
+    inline layout_type xview<CT, S...>::layout() const noexcept
+    {
+        return xtl::mpl::static_if<is_strided_view>(
+            [&](auto self)
+            {
+                if (static_layout != layout_type::dynamic)
+                {
+                    return static_layout;
+                }
+                else
+                {
+                    bool strides_match = do_strides_match(
+                        self(this)->shape(),
+                        self(this)->strides(),
+                        self(this)->m_e.layout(),
+                        true
+                    );
+                    return strides_match ? self(this)->m_e.layout() : layout_type::dynamic;
+                }
+            },
+            /* else */
+            [&](auto /*self*/)
+            {
+                return layout_type::dynamic;
+            }
+        );
+    }
+
+    template <class CT, class... S>
+    inline bool xview<CT, S...>::is_contiguous() const noexcept
+    {
+        return layout() != layout_type::dynamic;
+    }
+
+    //@}
+
+    /**
+     * @name Data
+     */
+    //@{
+
+    /**
+     * Fills the view with the given value.
+     * @param value the value to fill the view with.
+     */
+    template <class CT, class... S>
+    template <class T>
+    inline void xview<CT, S...>::fill(const T& value)
+    {
+        xtl::mpl::static_if<static_layout != layout_type::dynamic>(
+            [&](auto self)
+            {
+                std::fill(self(this)->linear_begin(), self(this)->linear_end(), value);
+            },
+            /*else*/
+            [&](auto self)
+            {
+                std::fill(self(this)->begin(), self(this)->end(), value);
+            }
+        );
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices
+     * must be unsigned integers, the number of indices should be equal or greater
+     * than the number of dimensions of the view.
+     */
+    template <class CT, class... S>
+    template <class... Args>
+    inline auto xview<CT, S...>::operator()(Args... args) -> reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        // The static cast prevents the compiler from instantiating the template methods with signed integers,
+        // leading to warning about signed/unsigned conversions in the deeper layers of the access methods
+        return access(static_cast<size_type>(args)...);
+    }
+
+    /**
+     * Returns a reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the view, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.unchecked(0, 1);
+     * @endcode
+     */
+    template <class CT, class... S>
+    template <class... Args>
+    inline auto xview<CT, S...>::unchecked(Args... args) -> reference
+    {
+        return unchecked_impl(make_index_sequence(args...), static_cast<size_type>(args)...);
+    }
+
+    template <class CT, class... S>
+    template <class It>
+    inline auto xview<CT, S...>::element(It first, It last) -> reference
+    {
+        XTENSOR_TRY(check_element_index(shape(), first, last));
+        // TODO: avoid memory allocation
+        auto index = make_index(first, last);
+        return m_e.element(index.cbegin(), index.cend());
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices must be
+     * unsigned integers, the number of indices should be equal or greater than the number
+     * of dimensions of the view.
+     */
+    template <class CT, class... S>
+    template <class... Args>
+    inline auto xview<CT, S...>::operator()(Args... args) const -> const_reference
+    {
+        XTENSOR_TRY(check_index(shape(), args...));
+        XTENSOR_CHECK_DIMENSION(shape(), args...);
+        // The static cast prevents the compiler from instantiating the template methods with signed integers,
+        // leading to warning about signed/unsigned conversions in the deeper layers of the access methods
+        return access(static_cast<size_type>(args)...);
+    }
+
+    /**
+     * Returns a constant reference to the element at the specified position in the view.
+     * @param args a list of indices specifying the position in the view. Indices
+     * must be unsigned integers, the number of indices must be equal to the number of
+     * dimensions of the view, else the behavior is undefined.
+     *
+     * @warning This method is meant for performance, for expressions with a dynamic
+     * number of dimensions (i.e. not known at compile time). Since it may have
+     * undefined behavior (see parameters), operator() should be preferred whenever
+     * it is possible.
+     * @warning This method is NOT compatible with broadcasting, meaning the following
+     * code has undefined behavior:
+     * @code{.cpp}
+     * xt::xarray<double> a = {{0, 1}, {2, 3}};
+     * xt::xarray<double> b = {0, 1};
+     * auto fd = a + b;
+     * double res = fd.unchecked(0, 1);
+     * @endcode
+     */
+    template <class CT, class... S>
+    template <class... Args>
+    inline auto xview<CT, S...>::unchecked(Args... args) const -> const_reference
+    {
+        return unchecked_impl(make_index_sequence(args...), static_cast<size_type>(args)...);
+    }
+
+    template <class CT, class... S>
+    template <class It>
+    inline auto xview<CT, S...>::element(It first, It last) const -> const_reference
+    {
+        // TODO: avoid memory allocation
+        auto index = make_index(first, last);
+        return m_e.element(index.cbegin(), index.cend());
+    }
+
+    /**
+     * Returns a reference to the underlying expression of the view.
+     */
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::expression() noexcept -> xexpression_type&
+    {
+        return m_e;
+    }
+
+    /**
+     * Returns a const reference to the underlying expression of the view.
+     */
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::expression() const noexcept -> const xexpression_type&
+    {
+        return m_e;
+    }
+
+    /**
+     * Returns the data holder of the underlying container (only if the view is on a realized
+     * container). ``xt::eval`` will make sure that the underlying xexpression is
+     * on a realized container.
+     */
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::storage() -> std::enable_if_t<has_data_interface<T>::value, storage_type&>
+    {
+        return m_e.storage();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::storage() const
+        -> std::enable_if_t<has_data_interface<T>::value, const storage_type&>
+    {
+        return m_e.storage();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_begin()
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, linear_iterator>
+    {
+        return m_e.storage().begin() + data_offset();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_end()
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, linear_iterator>
+    {
+        return m_e.storage().begin() + data_offset() + this->size();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_begin() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+    {
+        return linear_cbegin();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_end() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+    {
+        return linear_cend();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_cbegin() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+    {
+        return m_e.storage().cbegin() + data_offset();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_cend() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_linear_iterator>
+    {
+        return m_e.storage().cbegin() + data_offset() + this->size();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_rbegin()
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, reverse_linear_iterator>
+    {
+        return reverse_linear_iterator(linear_end());
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_rend()
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, reverse_linear_iterator>
+    {
+        return reverse_linear_iterator(linear_begin());
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_rbegin() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+    {
+        return linear_crbegin();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_rend() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+    {
+        return linear_crend();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_crbegin() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+    {
+        return const_reverse_linear_iterator(linear_end());
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    auto xview<CT, S...>::linear_crend() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_reverse_linear_iterator>
+    {
+        return const_reverse_linear_iterator(linear_begin());
+    }
+
+    /**
+     * Return the strides for the underlying container of the view.
+     */
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::strides() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const inner_strides_type&>
+    {
+        if (!m_strides_computed)
+        {
+            compute_strides(std::integral_constant<bool, has_trivial_strides>{});
+            m_strides_computed = true;
+        }
+        return m_strides;
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::backstrides() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const inner_strides_type&>
+    {
+        if (!m_strides_computed)
+        {
+            compute_strides(std::integral_constant<bool, has_trivial_strides>{});
+            m_strides_computed = true;
+        }
+        return m_backstrides;
+    }
+
+    /**
+     * Return the pointer to the underlying buffer.
+     */
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::data() const
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, const_pointer>
+    {
+        return m_e.data();
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::data()
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, pointer>
+    {
+        return m_e.data();
+    }
+
+    template <class CT, class... S>
+    template <std::size_t... I>
+    inline std::size_t xview<CT, S...>::data_offset_impl(std::index_sequence<I...>) const noexcept
+    {
+        auto temp = std::array<std::ptrdiff_t, sizeof...(S)>(
+            {(static_cast<ptrdiff_t>(xt::value(std::get<I>(m_slices), 0)))...}
+        );
+
+        std::ptrdiff_t result = 0;
+        std::size_t i = 0;
+        for (; i < std::min(sizeof...(S), m_e.strides().size()); ++i)
+        {
+            result += temp[i] * m_e.strides()[i - newaxis_count_before<S...>(i)];
+        }
+        for (; i < sizeof...(S); ++i)
+        {
+            result += temp[i];
+        }
+        return static_cast<std::size_t>(result) + m_e.data_offset();
+    }
+
+    /**
+     * Return the offset to the first element of the view in the underlying container.
+     */
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::data_offset() const noexcept
+        -> std::enable_if_t<has_data_interface<T>::value && is_strided_view, std::size_t>
+    {
+        if (!m_strides_computed)
+        {
+            compute_strides(std::integral_constant<bool, has_trivial_strides>{});
+            m_strides_computed = true;
+        }
+        return m_data_offset;
+    }
+
+    //@}
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::underlying_size(size_type dim) const -> size_type
+    {
+        return m_e.shape()[dim];
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::operator&() & -> xtl::xclosure_pointer<self_type&>
+    {
+        return xtl::closure_pointer(*this);
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::operator&() const& -> xtl::xclosure_pointer<const self_type&>
+    {
+        return xtl::closure_pointer(*this);
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::operator&() && -> xtl::xclosure_pointer<self_type>
+    {
+        return xtl::closure_pointer(std::move(*this));
+    }
+
+    /**
+     * @name Broadcasting
+     */
+    //@{
+    /**
+     * Broadcast the shape of the view to the specified parameter.
+     * @param shape the result shape
+     * @param reuse_cache parameter for internal optimization
+     * @return a boolean indicating whether the broadcasting is trivial
+     */
+    template <class CT, class... S>
+    template <class ST>
+    inline bool xview<CT, S...>::broadcast_shape(ST& shape, bool) const
+    {
+        return xt::broadcast_shape(m_shape, shape);
+    }
+
+    /**
+     * Checks whether the xview can be linearly assigned to an expression
+     * with the specified strides.
+     * @return a boolean indicating whether a linear assign is possible
+     */
+    template <class CT, class... S>
+    template <class ST>
+    inline bool xview<CT, S...>::has_linear_assign(const ST& str) const
+    {
+        return xtl::mpl::static_if<is_strided_view>(
+            [&](auto self)
+            {
+                return str.size() == self(this)->strides().size()
+                       && std::equal(str.cbegin(), str.cend(), self(this)->strides().begin());
+            },
+            /*else*/
+            [](auto /*self*/)
+            {
+                return false;
+            }
+        );
+    }
+
+    //@}
+
+    template <class CT, class... S>
+    template <class It>
+    inline It xview<CT, S...>::data_xbegin_impl(It begin) const noexcept
+    {
+        return begin + data_offset();
+    }
+
+    template <class CT, class... S>
+    template <class It>
+    inline It xview<CT, S...>::data_xend_impl(It begin, layout_type l, size_type offset) const noexcept
+    {
+        return strided_data_end(*this, begin, l, offset);
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::data_xbegin() noexcept -> container_iterator
+    {
+        return data_xbegin_impl(data());
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::data_xbegin() const noexcept -> const_container_iterator
+    {
+        return data_xbegin_impl(data());
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::data_xend(layout_type l, size_type offset) noexcept -> container_iterator
+    {
+        return data_xend_impl(data() + data_offset(), l, offset);
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::data_xend(layout_type l, size_type offset) const noexcept
+        -> const_container_iterator
+    {
+        return data_xend_impl(data() + data_offset(), l, offset);
+    }
+
+    // Assign to operator enabled for contigous views
+    template <class CT, class... S>
+    template <class E, class T, class>
+    void xview<CT, S...>::assign_to(xexpression<E>& e, bool force_resize) const
+    {
+        auto& de = e.derived_cast();
+        de.resize(shape(), force_resize);
+        std::copy(data() + data_offset(), data() + data_offset() + de.size(), de.template begin<static_layout>());
+    }
+
+    template <class CT, class... S>
+    template <class E, std::size_t... I>
+    inline auto xview<CT, S...>::build_view_impl(E&& e, std::index_sequence<I...>) const -> rebind_t<E>
+    {
+        return rebind_t<E>(std::forward<E>(e), std::get<I>(m_slices)...);
+    }
+
+    template <class CT, class... S>
+    template <class E>
+    inline auto xview<CT, S...>::build_view(E&& e) const -> rebind_t<E>
+    {
+        return build_view_impl(std::forward<E>(e), std::make_index_sequence<sizeof...(S)>());
+    }
+
+    template <class CT, class... S>
+    template <class align, class simd, class T>
+    inline auto xview<CT, S...>::store_simd(size_type i, const simd& e) -> enable_simd_interface<T, void>
+    {
+        return m_e.template store_simd<xt_simd::unaligned_mode>(data_offset() + i, e);
+    }
+
+    template <class CT, class... S>
+    template <class align, class requested_type, std::size_t N, class T>
+    inline auto xview<CT, S...>::load_simd(size_type i) const
+        -> enable_simd_interface<T, simd_return_type<requested_type>>
+    {
+        return m_e.template load_simd<xt_simd::unaligned_mode, requested_type>(data_offset() + i);
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::data_element(size_type i) -> enable_simd_interface<T, reference>
+    {
+        return m_e.data_element(data_offset() + i);
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::data_element(size_type i) const -> enable_simd_interface<T, const_reference>
+    {
+        return m_e.data_element(data_offset() + i);
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::flat(size_type i) -> enable_simd_interface<T, reference>
+    {
+        XTENSOR_ASSERT(is_contiguous());
+        return m_e.flat(data_offset() + i);
+    }
+
+    template <class CT, class... S>
+    template <class T>
+    inline auto xview<CT, S...>::flat(size_type i) const -> enable_simd_interface<T, const_reference>
+    {
+        XTENSOR_ASSERT(is_contiguous());
+        return m_e.flat(data_offset() + i);
+    }
+
+    template <class CT, class... S>
+    template <class... Args>
+    inline auto xview<CT, S...>::make_index_sequence(Args...) const noexcept
+    {
+        return std::make_index_sequence<
+            (sizeof...(Args) + integral_count<S...>() > newaxis_count<S...>()
+                 ? sizeof...(Args) + integral_count<S...>() - newaxis_count<S...>()
+                 : 0)>();
+    }
+
+    template <class CT, class... S>
+    template <std::size_t... I>
+    inline auto xview<CT, S...>::compute_strides_impl(std::index_sequence<I...>) const noexcept
+    {
+        std::size_t original_dim = m_e.dimension();
+        return std::array<std::ptrdiff_t, sizeof...(I)>(
+            {(static_cast<std::ptrdiff_t>(xt::step_size(std::get<integral_skip<S...>(I)>(m_slices), 1))
+              * ((integral_skip<S...>(I) - newaxis_count_before<S...>(integral_skip<S...>(I))) < original_dim
+                     ? m_e.strides()[integral_skip<S...>(I) - newaxis_count_before<S...>(integral_skip<S...>(I))]
+                     : 1))...}
+        );
+    }
+
+    template <class CT, class... S>
+    inline void xview<CT, S...>::compute_strides(std::false_type) const
+    {
+        m_strides = xtl::make_sequence<inner_strides_type>(this->dimension(), 0);
+        m_backstrides = xtl::make_sequence<inner_strides_type>(this->dimension(), 0);
+
+        constexpr std::size_t n_strides = sizeof...(S) - integral_count<S...>();
+
+        auto slice_strides = compute_strides_impl(std::make_index_sequence<n_strides>());
+
+        for (std::size_t i = 0; i < n_strides; ++i)
+        {
+            m_strides[i] = slice_strides[i];
+            // adapt strides for shape[i] == 1 to make consistent with rest of xtensor
+            detail::adapt_strides(shape(), m_strides, &m_backstrides, i);
+        }
+        for (std::size_t i = n_strides; i < this->dimension(); ++i)
+        {
+            m_strides[i] = m_e.strides()[i + integral_count<S...>() - newaxis_count<S...>()];
+            detail::adapt_strides(shape(), m_strides, &m_backstrides, i);
+        }
+
+        m_data_offset = data_offset_impl(std::make_index_sequence<sizeof...(S)>());
+    }
+
+    template <class CT, class... S>
+    inline void xview<CT, S...>::compute_strides(std::true_type) const
+    {
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::access() -> reference
+    {
+        return access_impl(make_index_sequence());
+    }
+
+    template <class CT, class... S>
+    template <class Arg, class... Args>
+    inline auto xview<CT, S...>::access(Arg arg, Args... args) -> reference
+    {
+        if (sizeof...(Args) >= this->dimension())
+        {
+            return access(args...);
+        }
+        return access_impl(make_index_sequence(arg, args...), arg, args...);
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::access() const -> const_reference
+    {
+        return access_impl(make_index_sequence());
+    }
+
+    template <class CT, class... S>
+    template <class Arg, class... Args>
+    inline auto xview<CT, S...>::access(Arg arg, Args... args) const -> const_reference
+    {
+        if (sizeof...(Args) >= this->dimension())
+        {
+            return access(args...);
+        }
+        return access_impl(make_index_sequence(arg, args...), arg, args...);
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type... I, class... Args>
+    inline auto xview<CT, S...>::unchecked_impl(std::index_sequence<I...>, Args... args) -> reference
+    {
+        return m_e.unchecked(index<I>(args...)...);
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type... I, class... Args>
+    inline auto xview<CT, S...>::unchecked_impl(std::index_sequence<I...>, Args... args) const
+        -> const_reference
+    {
+        return m_e.unchecked(index<I>(args...)...);
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type... I, class... Args>
+    inline auto xview<CT, S...>::access_impl(std::index_sequence<I...>, Args... args) -> reference
+    {
+        return m_e(index<I>(args...)...);
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type... I, class... Args>
+    inline auto xview<CT, S...>::access_impl(std::index_sequence<I...>, Args... args) const -> const_reference
+    {
+        return m_e(index<I>(args...)...);
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type I, class... Args>
+    inline auto xview<CT, S...>::index(Args... args) const
+        -> std::enable_if_t<lesser_condition<I>::value, size_type>
+    {
+        return sliced_access<I - integral_count_before<S...>(I) + newaxis_count_before<S...>(I + 1)>(
+            std::get<I + newaxis_count_before<S...>(I + 1)>(m_slices),
+            args...
+        );
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type I, class... Args>
+    inline auto xview<CT, S...>::index(Args... args) const
+        -> std::enable_if_t<!lesser_condition<I>::value, size_type>
+    {
+        return argument<I - integral_count<S...>() + newaxis_count<S...>()>(args...);
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type I, class T>
+    inline auto xview<CT, S...>::sliced_access(const xslice<T>& slice) const -> size_type
+    {
+        return static_cast<size_type>(slice.derived_cast()(0));
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type I, class T, class Arg, class... Args>
+    inline auto xview<CT, S...>::sliced_access(const xslice<T>& slice, Arg arg, Args... args) const -> size_type
+    {
+        using ST = typename T::size_type;
+        return static_cast<size_type>(
+            slice.derived_cast()(argument<I>(static_cast<ST>(arg), static_cast<ST>(args)...))
+        );
+    }
+
+    template <class CT, class... S>
+    template <typename std::decay_t<CT>::size_type I, class T, class... Args>
+    inline auto xview<CT, S...>::sliced_access(const T& squeeze, Args...) const -> disable_xslice<T, size_type>
+    {
+        return static_cast<size_type>(squeeze);
+    }
+
+    template <class CT, class... S>
+    template <class It>
+    inline auto xview<CT, S...>::make_index(It first, It last) const -> base_index_type
+    {
+        auto index = xtl::make_sequence<base_index_type>(m_e.dimension(), 0);
+        using diff_type = typename std::iterator_traits<It>::difference_type;
+        using ivalue_type = typename base_index_type::value_type;
+        auto func1 = [&first](const auto& s) noexcept
+        {
+            return get_slice_value(s, first);
+        };
+        auto func2 = [](const auto& s) noexcept
+        {
+            return xt::value(s, 0);
+        };
+
+        auto s = static_cast<diff_type>(
+            (std::min)(static_cast<size_type>(std::distance(first, last)), this->dimension())
+        );
+        auto first_copy = last - s;
+        for (size_type i = 0; i != m_e.dimension(); ++i)
+        {
+            size_type k = newaxis_skip<S...>(i);
+
+            // need to advance captured `first`
+            first = first_copy;
+            std::advance(first, static_cast<diff_type>(k - xt::integral_count_before<S...>(i)));
+
+            if (first < last)
+            {
+                index[i] = k < sizeof...(S) ? apply<size_type>(k, func1, m_slices)
+                                            : static_cast<ivalue_type>(*first);
+            }
+            else
+            {
+                index[i] = k < sizeof...(S) ? apply<size_type>(k, func2, m_slices) : ivalue_type(0);
+            }
+        }
+        return index;
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::compute_shape(std::true_type) const -> inner_shape_type
+    {
+        return inner_shape_type(m_e.shape());
+    }
+
+    template <class CT, class... S>
+    inline auto xview<CT, S...>::compute_shape(std::false_type) const -> inner_shape_type
+    {
+        std::size_t dim = m_e.dimension() - integral_count<S...>() + newaxis_count<S...>();
+        auto shape = xtl::make_sequence<inner_shape_type>(dim, 0);
+        auto func = [](const auto& s) noexcept
+        {
+            return get_size(s);
+        };
+        for (size_type i = 0; i != dim; ++i)
+        {
+            size_type index = integral_skip<S...>(i);
+            shape[i] = index < sizeof...(S) ? apply<size_type>(index, func, m_slices)
+                                            : m_e.shape()[index - newaxis_count_before<S...>(index)];
+        }
+        return shape;
+    }
+
+    namespace xview_detail
+    {
+        template <class V, class T>
+        inline void run_assign_temporary_impl(V& v, const T& t, std::true_type /* enable strided assign */)
+        {
+            strided_loop_assigner<true>::run(v, t);
+        }
+
+        template <class V, class T>
+        inline void
+        run_assign_temporary_impl(V& v, const T& t, std::false_type /* fallback to iterator assign */)
+        {
+            std::copy(t.cbegin(), t.cend(), v.begin());
+        }
+    }
+
+    template <class CT, class... S>
+    inline void xview<CT, S...>::assign_temporary_impl(temporary_type&& tmp)
+    {
+        constexpr bool fast_assign = detail::is_strided_view<xexpression_type, S...>::value
+                                     && xassign_traits<xview<CT, S...>, temporary_type>::simd_strided_assign();
+        xview_detail::run_assign_temporary_impl(*this, tmp, std::integral_constant<bool, fast_assign>{});
+    }
+
+    namespace detail
+    {
+        template <class E, class... S>
+        inline std::size_t get_underlying_shape_index(std::size_t I)
+        {
+            return I - newaxis_count_before<get_slice_type<E, S>...>(I);
+        }
+
+        template <class... S>
+        struct check_slice;
+
+        template <>
+        struct check_slice<>
+        {
+            using type = void_t<>;
+        };
+
+        template <class S, class... SL>
+        struct check_slice<S, SL...>
+        {
+            static_assert(!std::is_same<S, xellipsis_tag>::value, "ellipsis not supported vith xview");
+            using type = typename check_slice<SL...>::type;
+        };
+
+        template <class E, std::size_t... I, class... S>
+        inline auto make_view_impl(E&& e, std::index_sequence<I...>, S&&... slices)
+        {
+            // Checks that no ellipsis slice is used
+            using view_type = xview<xtl::closure_type_t<E>, get_slice_type<std::decay_t<E>, S>...>;
+            return view_type(
+                std::forward<E>(e),
+                get_slice_implementation(
+                    e,
+                    std::forward<S>(slices),
+                    get_underlying_shape_index<std::decay_t<E>, S...>(I)
+                )...
+            );
+        }
+    }
+
+    /**
+     * Constructs and returns a view on the specified xexpression. Users
+     * should not directly construct the slices but call helper functions
+     * instead.
+     * @param e the xexpression to adapt
+     * @param slices the slices list describing the view. \c view accepts negative
+     * indices, in that case indexing is done in reverse order.
+     * @sa range, all, newaxis
+     */
+    template <class E, class... S>
+    inline auto view(E&& e, S&&... slices)
+    {
+        return detail::make_view_impl(
+            std::forward<E>(e),
+            std::make_index_sequence<sizeof...(S)>(),
+            std::forward<S>(slices)...
+        );
+    }
+
+    namespace detail
+    {
+        class row_impl
+        {
+        public:
+
+            template <class E>
+            inline static auto make(E&& e, const std::ptrdiff_t index)
+            {
+                const auto shape = e.shape();
+                check_dimension(shape);
+                return view(e, index, xt::all());
+            }
+
+        private:
+
+            template <class S>
+            inline static void check_dimension(const S& shape)
+            {
+                if (shape.size() != 2)
+                {
+                    XTENSOR_THROW(
+                        std::invalid_argument,
+                        "A row can only be accessed on an expression with exact two dimensions"
+                    );
+                }
+            }
+
+            template <class T, std::size_t N>
+            inline static void check_dimension(const std::array<T, N>&)
+            {
+                static_assert(N == 2, "A row can only be accessed on an expression with exact two dimensions");
+            }
+        };
+
+        class column_impl
+        {
+        public:
+
+            template <class E>
+            inline static auto make(E&& e, const std::ptrdiff_t index)
+            {
+                const auto shape = e.shape();
+                check_dimension(shape);
+                return view(e, xt::all(), index);
+            }
+
+        private:
+
+            template <class S>
+            inline static void check_dimension(const S& shape)
+            {
+                if (shape.size() != 2)
+                {
+                    XTENSOR_THROW(
+                        std::invalid_argument,
+                        "A column can only be accessed on an expression with exact two dimensions"
+                    );
+                }
+            }
+
+            template <class T, std::size_t N>
+            inline static void check_dimension(const std::array<T, N>&)
+            {
+                static_assert(N == 2, "A column can only be accessed on an expression with exact two dimensions");
+            }
+        };
+    }
+
+    /**
+     * Constructs and returns a row (sliced view) on the specified expression.
+     * Users should not directly construct the slices but call helper functions
+     * instead. This function is only allowed on expressions with two dimensions.
+     * @param e the xexpression to adapt
+     * @param index 0-based index of the row, negative indices will return the
+     * last rows in reverse order.
+     * @throws std::invalid_argument if the expression has more than 2 dimensions.
+     */
+    template <class E>
+    inline auto row(E&& e, std::ptrdiff_t index)
+    {
+        return detail::row_impl::make(e, index);
+    }
+
+    /**
+     * Constructs and returns a column (sliced view) on the specified expression.
+     * Users should not directly construct the slices but call helper functions
+     * instead. This function is only allowed on expressions with two dimensions.
+     * @param e the xexpression to adapt
+     * @param index 0-based index of the column, negative indices will return the
+     * last columns in reverse order.
+     * @throws std::invalid_argument if the expression has more than 2 dimensions.
+     */
+    template <class E>
+    inline auto col(E&& e, std::ptrdiff_t index)
+    {
+        return detail::column_impl::make(e, index);
+    }
+
+    /***************
+     * stepper api *
+     ***************/
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_begin(const ST& shape) -> std::enable_if_t<!Enable, stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        return stepper(this, m_e.stepper_begin(m_e.shape()), offset);
+    }
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_end(const ST& shape, layout_type l)
+        -> std::enable_if_t<!Enable, stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        return stepper(this, m_e.stepper_end(m_e.shape(), l), offset, true, l);
+    }
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_begin(const ST& shape) const
+        -> std::enable_if_t<!Enable, const_stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        const xexpression_type& e = m_e;
+        return const_stepper(this, e.stepper_begin(m_e.shape()), offset);
+    }
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_end(const ST& shape, layout_type l) const
+        -> std::enable_if_t<!Enable, const_stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        const xexpression_type& e = m_e;
+        return const_stepper(this, e.stepper_end(m_e.shape(), l), offset, true, l);
+    }
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_begin(const ST& shape) -> std::enable_if_t<Enable, stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        return stepper(this, data_xbegin(), offset);
+    }
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_end(const ST& shape, layout_type l)
+        -> std::enable_if_t<Enable, stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        return stepper(this, data_xend(l, offset), offset);
+    }
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_begin(const ST& shape) const
+        -> std::enable_if_t<Enable, const_stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(this, data_xbegin(), offset);
+    }
+
+    template <class CT, class... S>
+    template <class ST, bool Enable>
+    inline auto xview<CT, S...>::stepper_end(const ST& shape, layout_type l) const
+        -> std::enable_if_t<Enable, const_stepper>
+    {
+        size_type offset = shape.size() - this->dimension();
+        return const_stepper(this, data_xend(l, offset), offset);
+    }
+
+    /********************************
+     * xview_stepper implementation *
+     ********************************/
+
+    template <bool is_const, class CT, class... S>
+    inline xview_stepper<is_const, CT, S...>::xview_stepper(
+        view_type* view,
+        substepper_type it,
+        size_type offset,
+        bool end,
+        layout_type l
+    )
+        : p_view(view)
+        , m_it(it)
+        , m_offset(offset)
+    {
+        if (!end)
+        {
+            std::fill(m_index_keeper.begin(), m_index_keeper.end(), 0);
+            auto func = [](const auto& s) noexcept
+            {
+                return xt::value(s, 0);
+            };
+            for (size_type i = 0; i < sizeof...(S); ++i)
+            {
+                if (!is_newaxis_slice(i))
+                {
+                    size_type s = apply<size_type>(i, func, p_view->slices());
+                    size_type index = i - newaxis_count_before<S...>(i);
+                    m_it.step(index, s);
+                }
+            }
+        }
+        else
+        {
+            to_end_impl(l);
+        }
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline auto xview_stepper<is_const, CT, S...>::operator*() const -> reference
+    {
+        return *m_it;
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::step(size_type dim)
+    {
+        auto func = [this](size_type index, size_type offset)
+        {
+            m_it.step(index, offset);
+        };
+        common_step_forward(dim, func);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::step_back(size_type dim)
+    {
+        auto func = [this](size_type index, size_type offset)
+        {
+            m_it.step_back(index, offset);
+        };
+        common_step_backward(dim, func);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::step(size_type dim, size_type n)
+    {
+        auto func = [this](size_type index, size_type offset)
+        {
+            m_it.step(index, offset);
+        };
+        common_step_forward(dim, n, func);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::step_back(size_type dim, size_type n)
+    {
+        auto func = [this](size_type index, size_type offset)
+        {
+            m_it.step_back(index, offset);
+        };
+        common_step_backward(dim, n, func);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::reset(size_type dim)
+    {
+        auto func = [this](size_type index, size_type offset)
+        {
+            m_it.step_back(index, offset);
+        };
+        common_reset(dim, func, false);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::reset_back(size_type dim)
+    {
+        auto func = [this](size_type index, size_type offset)
+        {
+            m_it.step(index, offset);
+        };
+        common_reset(dim, func, true);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::to_begin()
+    {
+        std::fill(m_index_keeper.begin(), m_index_keeper.end(), 0);
+        m_it.to_begin();
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::to_end(layout_type l)
+    {
+        m_it.to_end(l);
+        to_end_impl(l);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline bool xview_stepper<is_const, CT, S...>::is_newaxis_slice(size_type index) const noexcept
+    {
+        // A bit tricky but avoids a lot of template instantiations
+        return newaxis_count_before<S...>(index + 1) != newaxis_count_before<S...>(index);
+    }
+
+    template <bool is_const, class CT, class... S>
+    inline void xview_stepper<is_const, CT, S...>::to_end_impl(layout_type l)
+    {
+        auto func = [](const auto& s) noexcept
+        {
+            return xt::value(s, get_size(s) - 1);
+        };
+        auto size_func = [](const auto& s) noexcept
+        {
+            return get_size(s);
+        };
+
+        for (size_type i = 0; i < sizeof...(S); ++i)
+        {
+            if (!is_newaxis_slice(i))
+            {
+                size_type s = apply<size_type>(i, func, p_view->slices());
+                size_type ix = apply<size_type>(i, size_func, p_view->slices());
+                m_index_keeper[i] = ix - size_type(1);
+                size_type index = i - newaxis_count_before<S...>(i);
+                s = p_view->underlying_size(index) - 1 - s;
+                m_it.step_back(index, s);
+            }
+        }
+        if (l == layout_type::row_major)
+        {
+            for (size_type i = sizeof...(S); i > 0; --i)
+            {
+                if (!is_newaxis_slice(i - 1))
+                {
+                    m_index_keeper[i - 1]++;
+                    break;
+                }
+            }
+        }
+        else if (l == layout_type::column_major)
+        {
+            for (size_type i = 0; i < sizeof...(S); ++i)
+            {
+                if (!is_newaxis_slice(i))
+                {
+                    m_index_keeper[i]++;
+                    break;
+                }
+            }
+        }
+        else
+        {
+            XTENSOR_THROW(std::runtime_error, "Iteration only allowed in row or column major.");
+        }
+    }
+
+    template <bool is_const, class CT, class... S>
+    template <class F>
+    void xview_stepper<is_const, CT, S...>::common_step_forward(size_type dim, F f)
+    {
+        if (dim >= m_offset)
+        {
+            auto func = [&dim, this](const auto& s) noexcept
+            {
+                return step_size(s, this->m_index_keeper[dim]++, 1);
+            };
+            size_type index = integral_skip<S...>(dim);
+            if (!is_newaxis_slice(index))
+            {
+                size_type step_size = index < sizeof...(S) ? apply<size_type>(index, func, p_view->slices())
+                                                           : 1;
+                index -= newaxis_count_before<S...>(index);
+                f(index, step_size);
+            }
+        }
+    }
+
+    template <bool is_const, class CT, class... S>
+    template <class F>
+    void xview_stepper<is_const, CT, S...>::common_step_forward(size_type dim, size_type n, F f)
+    {
+        if (dim >= m_offset)
+        {
+            auto func = [&dim, &n, this](const auto& s) noexcept
+            {
+                auto st_size = step_size(s, this->m_index_keeper[dim], n);
+                this->m_index_keeper[dim] += n;
+                return size_type(st_size);
+            };
+
+            size_type index = integral_skip<S...>(dim);
+            if (!is_newaxis_slice(index))
+            {
+                size_type step_size = index < sizeof...(S) ? apply<size_type>(index, func, p_view->slices())
+                                                           : n;
+                index -= newaxis_count_before<S...>(index);
+                f(index, step_size);
+            }
+        }
+    }
+
+    template <bool is_const, class CT, class... S>
+    template <class F>
+    void xview_stepper<is_const, CT, S...>::common_step_backward(size_type dim, F f)
+    {
+        if (dim >= m_offset)
+        {
+            auto func = [&dim, this](const auto& s) noexcept
+            {
+                this->m_index_keeper[dim]--;
+                return step_size(s, this->m_index_keeper[dim], 1);
+            };
+            size_type index = integral_skip<S...>(dim);
+            if (!is_newaxis_slice(index))
+            {
+                size_type step_size = index < sizeof...(S) ? apply<size_type>(index, func, p_view->slices())
+                                                           : 1;
+                index -= newaxis_count_before<S...>(index);
+                f(index, step_size);
+            }
+        }
+    }
+
+    template <bool is_const, class CT, class... S>
+    template <class F>
+    void xview_stepper<is_const, CT, S...>::common_step_backward(size_type dim, size_type n, F f)
+    {
+        if (dim >= m_offset)
+        {
+            auto func = [&dim, &n, this](const auto& s) noexcept
+            {
+                this->m_index_keeper[dim] -= n;
+                return step_size(s, this->m_index_keeper[dim], n);
+            };
+
+            size_type index = integral_skip<S...>(dim);
+            if (!is_newaxis_slice(index))
+            {
+                size_type step_size = index < sizeof...(S) ? apply<size_type>(index, func, p_view->slices())
+                                                           : n;
+                index -= newaxis_count_before<S...>(index);
+                f(index, step_size);
+            }
+        }
+    }
+
+    template <bool is_const, class CT, class... S>
+    template <class F>
+    void xview_stepper<is_const, CT, S...>::common_reset(size_type dim, F f, bool backwards)
+    {
+        auto size_func = [](const auto& s) noexcept
+        {
+            return get_size(s);
+        };
+        auto end_func = [](const auto& s) noexcept
+        {
+            return xt::value(s, get_size(s) - 1) - xt::value(s, 0);
+        };
+
+        size_type index = integral_skip<S...>(dim);
+        if (!is_newaxis_slice(index))
+        {
+            if (dim < m_index_keeper.size())
+            {
+                size_type size = index < sizeof...(S) ? apply<size_type>(index, size_func, p_view->slices())
+                                                      : p_view->shape()[dim];
+                m_index_keeper[dim] = backwards ? size - 1 : 0;
+            }
+
+            size_type reset_n = index < sizeof...(S) ? apply<size_type>(index, end_func, p_view->slices())
+                                                     : p_view->shape()[dim] - 1;
+            index -= newaxis_count_before<S...>(index);
+            f(index, reset_n);
+        }
+    }
+}
+
+#endif

+ 283 - 0
3rd/numpy/include/xtensor/xview_utils.hpp

@@ -0,0 +1,283 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+ * Copyright (c) QuantStack                                                 *
+ *                                                                          *
+ * Distributed under the terms of the BSD 3-Clause License.                 *
+ *                                                                          *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XTENSOR_VIEW_UTILS_HPP
+#define XTENSOR_VIEW_UTILS_HPP
+
+#include <array>
+
+#include "xlayout.hpp"
+#include "xslice.hpp"
+#include "xtensor_forward.hpp"
+
+namespace xt
+{
+
+    /********************************
+     * helper functions declaration *
+     ********************************/
+
+    // number of integral types in the specified sequence of types
+    template <class... S>
+    constexpr std::size_t integral_count();
+
+    // number of integral types in the specified sequence of types before specified index
+    template <class... S>
+    constexpr std::size_t integral_count_before(std::size_t i);
+
+    // index in the specified sequence of types of the ith non-integral type
+    template <class... S>
+    constexpr std::size_t integral_skip(std::size_t i);
+
+    // number of newaxis types in the specified sequence of types
+    template <class... S>
+    constexpr std::size_t newaxis_count();
+
+    // number of newaxis types in the specified sequence of types before specified index
+    template <class... S>
+    constexpr std::size_t newaxis_count_before(std::size_t i);
+
+    // index in the specified sequence of types of the ith non-newaxis type
+    template <class... S>
+    constexpr std::size_t newaxis_skip(std::size_t i);
+
+    template <class S, class It>
+    inline disable_xslice<S, std::size_t> get_slice_value(const S& s, It&) noexcept
+    {
+        return static_cast<std::size_t>(s);
+    }
+
+    template <class S, class It>
+    inline auto get_slice_value(const xslice<S>& slice, It& it) noexcept
+    {
+        return slice.derived_cast()(typename S::size_type(*it));
+    }
+
+    /***********************
+     * view_temporary_type *
+     ***********************/
+
+    namespace detail
+    {
+        template <class T, class S, layout_type L, class... SL>
+        struct view_temporary_type_impl
+        {
+            using type = xt::xarray<T, L>;
+        };
+
+        template <class T, class I, std::size_t N, layout_type L, class... SL>
+        struct view_temporary_type_impl<T, std::array<I, N>, L, SL...>
+        {
+            using type = xt::xtensor<T, N + newaxis_count<SL...>() - integral_count<SL...>(), L>;
+        };
+    }
+
+    template <class E, class... SL>
+    struct view_temporary_type
+    {
+        using type = typename detail::view_temporary_type_impl<
+            std::decay_t<typename E::value_type>,
+            typename E::shape_type,
+            E::static_layout,
+            SL...>::type;
+    };
+
+    template <class E, class... SL>
+    using view_temporary_type_t = typename view_temporary_type<E, SL...>::type;
+
+    /************************
+     * count integral types *
+     ************************/
+
+    namespace detail
+    {
+
+        template <class T, class... S>
+        struct integral_count_impl
+        {
+            static constexpr std::size_t count(std::size_t i) noexcept
+            {
+                return i
+                           ? (integral_count_impl<S...>::count(i - 1)
+                              + (xtl::is_integral<std::remove_reference_t<T>>::value ? 1 : 0))
+                           : 0;
+            }
+        };
+
+        template <>
+        struct integral_count_impl<void>
+        {
+            static constexpr std::size_t count(std::size_t /*i*/) noexcept
+            {
+                return 0;
+            }
+        };
+    }
+
+    template <class... S>
+    constexpr std::size_t integral_count()
+    {
+        return detail::integral_count_impl<S..., void>::count(sizeof...(S));
+    }
+
+    template <class... S>
+    constexpr std::size_t integral_count_before(std::size_t i)
+    {
+        return detail::integral_count_impl<S..., void>::count(i);
+    }
+
+    /***********************
+     * count newaxis types *
+     ***********************/
+
+    namespace detail
+    {
+        template <class T>
+        struct is_newaxis : std::false_type
+        {
+        };
+
+        template <class T>
+        struct is_newaxis<xnewaxis<T>> : public std::true_type
+        {
+        };
+
+        template <class T, class... S>
+        struct newaxis_count_impl
+        {
+            static constexpr std::size_t count(std::size_t i) noexcept
+            {
+                return i
+                           ? (newaxis_count_impl<S...>::count(i - 1)
+                              + (is_newaxis<std::remove_reference_t<T>>::value ? 1 : 0))
+                           : 0;
+            }
+        };
+
+        template <>
+        struct newaxis_count_impl<void>
+        {
+            static constexpr std::size_t count(std::size_t /*i*/) noexcept
+            {
+                return 0;
+            }
+        };
+    }
+
+    template <class... S>
+    constexpr std::size_t newaxis_count()
+    {
+        return detail::newaxis_count_impl<S..., void>::count(sizeof...(S));
+    }
+
+    template <class... S>
+    constexpr std::size_t newaxis_count_before(std::size_t i)
+    {
+        return detail::newaxis_count_impl<S..., void>::count(i);
+    }
+
+    /**********************************
+     * index of ith non-integral type *
+     **********************************/
+
+    namespace detail
+    {
+
+        template <class T, class... S>
+        struct integral_skip_impl
+        {
+            static constexpr std::size_t count(std::size_t i) noexcept
+            {
+                return i == 0 ? count_impl() : count_impl(i);
+            }
+
+        private:
+
+            static constexpr std::size_t count_impl(std::size_t i) noexcept
+            {
+                return 1
+                       + (xtl::is_integral<std::remove_reference_t<T>>::value
+                              ? integral_skip_impl<S...>::count(i)
+                              : integral_skip_impl<S...>::count(i - 1));
+            }
+
+            static constexpr std::size_t count_impl() noexcept
+            {
+                return xtl::is_integral<std::remove_reference_t<T>>::value
+                           ? 1 + integral_skip_impl<S...>::count(0)
+                           : 0;
+            }
+        };
+
+        template <>
+        struct integral_skip_impl<void>
+        {
+            static constexpr std::size_t count(std::size_t i) noexcept
+            {
+                return i;
+            }
+        };
+    }
+
+    template <class... S>
+    constexpr std::size_t integral_skip(std::size_t i)
+    {
+        return detail::integral_skip_impl<S..., void>::count(i);
+    }
+
+    /*********************************
+     * index of ith non-newaxis type *
+     *********************************/
+
+    namespace detail
+    {
+
+        template <class T, class... S>
+        struct newaxis_skip_impl
+        {
+            static constexpr std::size_t count(std::size_t i) noexcept
+            {
+                return i == 0 ? count_impl() : count_impl(i);
+            }
+
+        private:
+
+            static constexpr std::size_t count_impl(std::size_t i) noexcept
+            {
+                return 1
+                       + (is_newaxis<std::remove_reference_t<T>>::value
+                              ? newaxis_skip_impl<S...>::count(i)
+                              : newaxis_skip_impl<S...>::count(i - 1));
+            }
+
+            static constexpr std::size_t count_impl() noexcept
+            {
+                return is_newaxis<std::remove_reference_t<T>>::value ? 1 + newaxis_skip_impl<S...>::count(0)
+                                                                     : 0;
+            }
+        };
+
+        template <>
+        struct newaxis_skip_impl<void>
+        {
+            static constexpr std::size_t count(std::size_t i) noexcept
+            {
+                return i;
+            }
+        };
+    }
+
+    template <class... S>
+    constexpr std::size_t newaxis_skip(std::size_t i)
+    {
+        return detail::newaxis_skip_impl<S..., void>::count(i);
+    }
+}
+
+#endif

+ 477 - 0
3rd/numpy/include/xtl/xany.hpp

@@ -0,0 +1,477 @@
+/***************************************************************************
+* Copyright (c) Sylvain Corlay and Johan Mabille and Wolf Vollprecht       *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_ANY_HPP
+#define XTL_ANY_HPP
+
+#include <exception>
+#include <stdexcept>
+#include <type_traits>
+#include <typeinfo>
+
+#include "xtl/xmeta_utils.hpp"
+
+namespace xtl
+{
+    /**************************************
+     * Implementation of C++17's std::any *
+     **************************************/
+
+    // Copyright (c) 2016 Denilson das Mercês Amorim
+    //
+    // Distributed under the Boost Software License, Version 1.0. (See accompanying
+    // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+    class bad_any_cast : public std::bad_cast
+    {
+    public:
+
+        const char* what() const noexcept override
+        {
+            return "bad any cast";
+        }
+    };
+
+    namespace detail {
+        inline static void check_any_cast(const void* p) {
+            if (p == nullptr) {
+#if defined(XTL_NO_EXCEPTIONS)
+                std::fprintf(stderr, "bad_any_cast\n");
+                std::terminate();        
+#else
+                throw bad_any_cast();
+#endif
+            }
+        }
+    } // namespace detail
+
+    class any final
+    {
+    public:
+
+        /// Constructs an object of type any with an empty state.
+        any()
+            : vtable(nullptr)
+        {
+        }
+
+        /// Constructs an object of type any with an equivalent state as other.
+        any(const any& rhs)
+            : vtable(rhs.vtable)
+        {
+            if (!rhs.empty())
+            {
+                rhs.vtable->copy(rhs.storage, this->storage);
+            }
+        }
+
+        /// Constructs an object of type any with a state equivalent to the original state of other.
+        /// rhs is left in a valid but otherwise unspecified state.
+        any(any&& rhs) noexcept
+            : vtable(rhs.vtable)
+        {
+            if (!rhs.empty())
+            {
+                rhs.vtable->move(rhs.storage, this->storage);
+                rhs.vtable = nullptr;
+            }
+        }
+
+        /// Same effect as this->clear().
+        ~any()
+        {
+            this->clear();
+        }
+
+        /// Constructs an object of type any that contains an object of type T direct-initialized with std::forward<ValueType>(value).
+        ///
+        /// T shall satisfy the CopyConstructible requirements, otherwise the program is ill-formed.
+        /// This is because an `any` may be copy constructed into another `any` at any time, so a copy should always be allowed.
+        template <typename ValueType, typename = typename std::enable_if<!std::is_same<typename std::decay<ValueType>::type, any>::value>::type>
+        any(ValueType&& value)
+        {
+            static_assert(std::is_copy_constructible<typename std::decay<ValueType>::type>::value,
+                          "T shall satisfy the CopyConstructible requirements.");
+            this->construct(std::forward<ValueType>(value));
+        }
+
+        /// Has the same effect as any(rhs).swap(*this). No effects if an exception is thrown.
+        any& operator=(const any& rhs)
+        {
+            any(rhs).swap(*this);
+            return *this;
+        }
+
+        /// Has the same effect as any(std::move(rhs)).swap(*this).
+        ///
+        /// The state of *this is equivalent to the original state of rhs and rhs is left in a valid
+        /// but otherwise unspecified state.
+        any& operator=(any&& rhs) noexcept
+        {
+            any(std::move(rhs)).swap(*this);
+            return *this;
+        }
+
+        /// Has the same effect as any(std::forward<ValueType>(value)).swap(*this). No effect if a exception is thrown.
+        ///
+        /// T shall satisfy the CopyConstructible requirements, otherwise the program is ill-formed.
+        /// This is because an `any` may be copy constructed into another `any` at any time, so a copy should always be allowed.
+        template <typename ValueType, typename = typename std::enable_if<!std::is_same<typename std::decay<ValueType>::type, any>::value>::type>
+        any& operator=(ValueType&& value)
+        {
+            static_assert(std::is_copy_constructible<typename std::decay<ValueType>::type>::value,
+                          "T shall satisfy the CopyConstructible requirements.");
+            any(std::forward<ValueType>(value)).swap(*this);
+            return *this;
+        }
+
+        /// If not empty, destroys the contained object.
+        void clear() noexcept
+        {
+            if (!empty())
+            {
+                this->vtable->destroy(storage);
+                this->vtable = nullptr;
+            }
+        }
+
+        /// C++17 equivalent of clear
+        void reset() noexcept
+        {
+            clear();
+        }
+
+        /// Returns true if *this has no contained object, otherwise false.
+        bool empty() const noexcept
+        {
+            return this->vtable == nullptr;
+        }
+
+        /// C++17 equivalent of !empty()
+        bool has_value() const noexcept
+        {
+            return !empty();
+        }
+
+        /// If *this has a contained object of type T, typeid(T); otherwise typeid(void).
+        const std::type_info& type() const noexcept
+        {
+            return empty() ? typeid(void) : this->vtable->type();
+        }
+
+        /// Exchange the states of *this and rhs.
+        void swap(any& rhs) noexcept
+        {
+            if (this->vtable != rhs.vtable)
+            {
+                any tmp(std::move(rhs));
+
+                // move from *this to rhs.
+                rhs.vtable = this->vtable;
+                if (this->vtable != nullptr)
+                {
+                    this->vtable->move(this->storage, rhs.storage);
+                    //this->vtable = nullptr; -- uneeded, see below
+                }
+
+                // move from tmp (previously rhs) to *this.
+                this->vtable = tmp.vtable;
+                if (tmp.vtable != nullptr)
+                {
+                    tmp.vtable->move(tmp.storage, this->storage);
+                    tmp.vtable = nullptr;
+                }
+            }
+            else  // same types
+            {
+                if (this->vtable != nullptr)
+                    this->vtable->swap(this->storage, rhs.storage);
+            }
+        }
+
+    private:  // Storage and Virtual Method Table
+
+        union storage_union {
+            using stack_storage_t = typename std::aligned_storage<2 * sizeof(void*), std::alignment_of<void*>::value>::type;
+
+            void* dynamic;
+            stack_storage_t stack;  // 2 words for e.g. shared_ptr
+        };
+
+        /// Base VTable specification.
+        struct vtable_type
+        {
+            // Note: The caller is responssible for doing .vtable = nullptr after destructful operations
+            // such as destroy() and/or move().
+
+            /// The type of the object this vtable is for.
+            const std::type_info& (*type)() noexcept;
+
+            /// Destroys the object in the union.
+            /// The state of the union after this call is unspecified, caller must ensure not to use src anymore.
+            void (*destroy)(storage_union&) noexcept;
+
+            /// Copies the **inner** content of the src union into the yet unitialized dest union.
+            /// As such, both inner objects will have the same state, but on separate memory locations.
+            void (*copy)(const storage_union& src, storage_union& dest);
+
+            /// Moves the storage from src to the yet unitialized dest union.
+            /// The state of src after this call is unspecified, caller must ensure not to use src anymore.
+            void (*move)(storage_union& src, storage_union& dest) noexcept;
+
+            /// Exchanges the storage between lhs and rhs.
+            void (*swap)(storage_union& lhs, storage_union& rhs) noexcept;
+        };
+
+        /// VTable for dynamically allocated storage.
+        template <typename T>
+        struct vtable_dynamic
+        {
+            static const std::type_info& type() noexcept
+            {
+                return typeid(T);
+            }
+
+            static void destroy(storage_union& storage) noexcept
+            {
+                //assert(reinterpret_cast<T*>(storage.dynamic));
+                delete reinterpret_cast<T*>(storage.dynamic);
+            }
+
+            static void copy(const storage_union& src, storage_union& dest)
+            {
+                dest.dynamic = new T(*reinterpret_cast<const T*>(src.dynamic));
+            }
+
+            static void move(storage_union& src, storage_union& dest) noexcept
+            {
+                dest.dynamic = src.dynamic;
+                src.dynamic = nullptr;
+            }
+
+            static void swap(storage_union& lhs, storage_union& rhs) noexcept
+            {
+                // just exchage the storage pointers.
+                std::swap(lhs.dynamic, rhs.dynamic);
+            }
+        };
+
+        /// VTable for stack allocated storage.
+        template <typename T>
+        struct vtable_stack
+        {
+            static const std::type_info& type() noexcept
+            {
+                return typeid(T);
+            }
+
+            static void destroy(storage_union& storage) noexcept
+            {
+                reinterpret_cast<T*>(&storage.stack)->~T();
+            }
+
+            static void copy(const storage_union& src, storage_union& dest)
+            {
+                new (&dest.stack) T(reinterpret_cast<const T&>(src.stack));
+            }
+
+            static void move(storage_union& src, storage_union& dest) noexcept
+            {
+                // one of the conditions for using vtable_stack is a nothrow move constructor,
+                // so this move constructor will never throw a exception.
+                new (&dest.stack) T(std::move(reinterpret_cast<T&>(src.stack)));
+                destroy(src);
+            }
+
+            static void swap(storage_union& lhs, storage_union& rhs) noexcept
+            {
+                storage_union tmp_storage;
+                move(rhs, tmp_storage);
+                move(lhs, rhs);
+                move(tmp_storage, lhs);
+            }
+        };
+
+        /// Whether the type T must be dynamically allocated or can be stored on the stack.
+        template <typename T>
+        struct requires_allocation : std::integral_constant<bool,
+                                                            !(std::is_nothrow_move_constructible<T>::value  // N4562 �6.3/3 [any.class]
+                                                              && sizeof(T) <= sizeof(storage_union::stack) && std::alignment_of<T>::value <= std::alignment_of<storage_union::stack_storage_t>::value)>
+        {
+        };
+
+        /// Returns the pointer to the vtable of the type T.
+        template <typename T>
+        static vtable_type* vtable_for_type()
+        {
+            using VTableType = typename std::conditional<requires_allocation<T>::value, vtable_dynamic<T>, vtable_stack<T>>::type;
+            static vtable_type table = {
+                VTableType::type, VTableType::destroy,
+                VTableType::copy, VTableType::move,
+                VTableType::swap,
+            };
+            return &table;
+        }
+
+    protected:
+        template <typename T>
+        friend const T* any_cast(const any* operand) noexcept;
+        template <typename T>
+        friend T* any_cast(any* operand) noexcept;
+
+        /// Same effect as is_same(this->type(), t);
+        bool is_typed(const std::type_info& t) const
+        {
+            return is_same(this->type(), t);
+        }
+
+        /// Checks if two type infos are the same.
+        ///
+        /// If ANY_IMPL_FAST_TYPE_INFO_COMPARE is defined, checks only the address of the
+        /// type infos, otherwise does an actual comparision. Checking addresses is
+        /// only a valid approach when there's no interaction with outside sources
+        /// (other shared libraries and such).
+        static bool is_same(const std::type_info& a, const std::type_info& b)
+        {
+#ifdef ANY_IMPL_FAST_TYPE_INFO_COMPARE
+            return &a == &b;
+#else
+            return a == b;
+#endif
+        }
+
+        /// Casts (with no type_info checks) the storage pointer as const T*.
+        template <typename T>
+        const T* cast() const noexcept
+        {
+            return requires_allocation<typename std::decay<T>::type>::value ? reinterpret_cast<const T*>(storage.dynamic) : reinterpret_cast<const T*>(&storage.stack);
+        }
+
+        /// Casts (with no type_info checks) the storage pointer as T*.
+        template <typename T>
+        T* cast() noexcept
+        {
+            return requires_allocation<typename std::decay<T>::type>::value ? reinterpret_cast<T*>(storage.dynamic) : reinterpret_cast<T*>(&storage.stack);
+        }
+
+    private:
+
+        storage_union storage;  // on offset(0) so no padding for align
+        vtable_type* vtable;
+
+        /// Chooses between stack and dynamic allocation for the type decay_t<ValueType>,
+        /// assigns the correct vtable, and constructs the object on our storage.
+        template <typename ValueType>
+        void construct(ValueType&& value)
+        {
+            using T = typename std::decay<ValueType>::type;
+
+            this->vtable = vtable_for_type<T>();
+
+            return xtl::mpl::static_if<requires_allocation<T>::value>([&](auto self)
+            {
+                self(*this).storage.dynamic = new T(std::forward<ValueType>(value));
+            }, /*else*/ [&](auto self)
+            {
+                new (&self(*this).storage.stack) T(std::forward<ValueType>(value));
+            });
+        }
+    };
+
+
+    namespace detail
+    {
+        template <typename ValueType>
+        inline ValueType any_cast_move_if_true(typename std::remove_reference<ValueType>::type* p, std::true_type)
+        {
+            return std::move(*p);
+        }
+
+        template <typename ValueType>
+        inline ValueType any_cast_move_if_true(typename std::remove_reference<ValueType>::type* p, std::false_type)
+        {
+            return *p;
+        }
+    }
+
+    /// Performs *any_cast<add_const_t<remove_reference_t<ValueType>>>(&operand), or throws bad_any_cast on failure.
+    template <typename ValueType>
+    inline ValueType any_cast(const any& operand)
+    {
+        auto p = any_cast<typename std::add_const<typename std::remove_reference<ValueType>::type>::type>(&operand);
+        detail::check_any_cast(p);
+        return *p;
+    }
+
+    /// Performs *any_cast<remove_reference_t<ValueType>>(&operand), or throws bad_any_cast on failure.
+    template <typename ValueType>
+    inline ValueType any_cast(any& operand)
+    {
+        auto p = any_cast<typename std::remove_reference<ValueType>::type>(&operand);
+        detail::check_any_cast(p);
+        return *p;
+    }
+
+    ///
+    /// If ANY_IMPL_ANYCAST_MOVEABLE is not defined, does as N4562 specifies:
+    ///     Performs *any_cast<remove_reference_t<ValueType>>(&operand), or throws bad_any_cast on failure.
+    ///
+    /// If ANY_IMPL_ANYCAST_MOVEABLE is defined, does as LWG Defect 2509 specifies:
+    ///     If ValueType is MoveConstructible and isn't a lvalue reference, performs
+    ///     std::move(*any_cast<remove_reference_t<ValueType>>(&operand)), otherwise
+    ///     *any_cast<remove_reference_t<ValueType>>(&operand). Throws bad_any_cast on failure.
+    ///
+    template <typename ValueType>
+    inline ValueType any_cast(any&& operand)
+    {
+#ifdef ANY_IMPL_ANY_CAST_MOVEABLE
+        // https://cplusplus.github.io/LWG/lwg-active.html#2509
+        using can_move = std::integral_constant<bool,
+                                                std::is_move_constructible<ValueType>::value && !std::is_lvalue_reference<ValueType>::value>;
+#else
+        using can_move = std::false_type;
+#endif
+
+        auto p = any_cast<typename std::remove_reference<ValueType>::type>(&operand);
+        detail::check_any_cast(p);
+        return detail::any_cast_move_if_true<ValueType>(p, can_move());
+    }
+
+    /// If operand != nullptr && operand->type() == typeid(ValueType), a pointer to the object
+    /// contained by operand, otherwise nullptr.
+    template <typename T>
+    inline const T* any_cast(const any* operand) noexcept
+    {
+        if (operand == nullptr || !operand->is_typed(typeid(T)))
+            return nullptr;
+        else
+            return operand->cast<T>();
+    }
+
+    /// If operand != nullptr && operand->type() == typeid(ValueType), a pointer to the object
+    /// contained by operand, otherwise nullptr.
+    template <typename T>
+    inline T* any_cast(any* operand) noexcept
+    {
+        if (operand == nullptr || !operand->is_typed(typeid(T)))
+            return nullptr;
+        else
+            return operand->cast<T>();
+    }
+}
+
+namespace std
+{
+    inline void swap(xtl::any& lhs, xtl::any& rhs) noexcept
+    {
+        lhs.swap(rhs);
+    }
+}
+
+#endif

+ 77 - 0
3rd/numpy/include/xtl/xbase64.hpp

@@ -0,0 +1,77 @@
+/***************************************************************************
+* Copyright (c) Sylvain Corlay and Johan Mabille and Wolf Vollprecht       *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_BASE64_HPP
+#define XTL_BASE64_HPP
+
+#include <array>
+#include <cstddef>
+#include <string>
+
+#include "xsequence.hpp"
+
+namespace xtl
+{
+    inline std::string base64decode(const std::string& input)
+    {
+        std::array<int, 256> T;
+        T.fill(-1);
+        for (std::size_t i = 0; i < 64; ++i)
+        {
+            T[std::size_t("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i])] = int(i);
+        }
+
+        std::string output;
+        int val = 0;
+        int valb = -8;
+        for (char c : input)
+        {
+            if (T[std::size_t(c)] == -1)
+            {
+                break;
+            }
+            val = (val << 6) + T[std::size_t(c)];
+            valb += 6;
+            if (valb >= 0)
+            {
+                output.push_back(char((val >> valb) & 0xFF));
+                valb -= 8;
+            }
+        }
+        return output;
+    }
+
+    inline std::string base64encode(const std::string& input)
+    {
+        std::string output;
+        int val = 0;
+        int valb = -6;
+        for (char sc : input)
+        {
+            unsigned char c = static_cast<unsigned char>(sc);
+            val = (val << 8) + c;
+            valb += 8;
+            while (valb >= 0)
+            {
+                output.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(val >> valb) & 0x3F]);
+                valb -= 6;
+            }
+        }
+        if (valb > -6)
+        {
+            output.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[((val << 8) >> (valb + 8)) & 0x3F]);
+        }
+        while (output.size() % 4)
+        {
+            output.push_back('=');
+        }
+        return output;
+    }
+}
+#endif

+ 2436 - 0
3rd/numpy/include/xtl/xbasic_fixed_string.hpp

@@ -0,0 +1,2436 @@
+/***************************************************************************
+* Copyright (c) Sylvain Corlay and Johan Mabille and Wolf Vollprecht       *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_BASIC_FIXED_STRING_HPP
+#define XTL_BASIC_FIXED_STRING_HPP
+
+#include <cstddef>
+#include <exception>
+#include <functional>
+#include <iterator>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <cassert>
+#include <algorithm>
+#include <type_traits>
+
+#ifdef __CLING__
+#include <nlohmann/json.hpp>
+#endif
+
+#include "xhash.hpp"
+#include "xtl_config.hpp"
+
+namespace xtl
+{
+
+    namespace string_policy
+    {
+        template <std::size_t>
+        struct silent_error;
+
+        template <std::size_t>
+        struct throwing_error;
+    }
+
+    /***********************
+     * xbasic_fixed_string *
+     ***********************/
+
+    enum storage_options
+    {
+        buffer = 1 << 0,
+        pointer = 1 << 1,
+        store_size = 1 << 2,
+        is_const = 1 << 3
+    };
+
+    template <class CT, std::size_t N = 55, int ST = buffer | store_size, template <std::size_t> class EP = string_policy::silent_error, class TR = std::char_traits<CT>>
+    class xbasic_fixed_string;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    std::basic_ostream<CT, TR>& operator<<(std::basic_ostream<CT, TR>& os,
+                                           const xbasic_fixed_string<CT, N, ST, EP, TR>& str);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    std::basic_istream<CT, TR>& operator>>(std::basic_istream<CT, TR>& is,
+                                           xbasic_fixed_string<CT, N, ST, EP, TR>& str);
+
+    template <class CT>
+    using xbasic_string_view = xbasic_fixed_string<const CT, 0, pointer | store_size | is_const>;
+
+    namespace detail
+    {
+        template <int selector>
+        struct select_storage;
+
+        template <typename T>
+        struct fixed_small_string_storage_impl;
+
+        template <class T, std::size_t N>
+        struct fixed_small_string_storage_impl<T[N]>
+        {
+            static_assert(N <= (1u << (8 * sizeof(T))), "small string");
+
+            fixed_small_string_storage_impl()
+            {
+                set_size(0);
+            }
+
+            fixed_small_string_storage_impl(T ptr[N], std::size_t size)
+                : m_buffer(ptr)
+            {
+                m_buffer[N - 1] = N - size;
+            }
+
+            T* buffer()
+            {
+                return m_buffer;
+            }
+
+            const T* buffer() const
+            {
+                return m_buffer;
+            }
+
+            std::size_t size() const
+            {
+                // Don't use std::make_unsinged_t here, this should remain C++11 compatible
+                using unsigned_type = typename std::make_unsigned<T>::type;
+                return N - reinterpret_cast<unsigned_type const*>(m_buffer)[N - 1];
+            }
+
+            void set_size(std::size_t sz)
+            {
+                assert(sz < N && "setting a small size");
+                // Don't use std::make_unsinged_t here, this should remain C++11 compatible
+                using unsigned_type = typename std::make_unsigned<T>::type;
+                reinterpret_cast<unsigned_type*>(m_buffer)[N - 1] = static_cast<unsigned_type>(N - sz);
+                m_buffer[sz] = '\0';
+            }
+
+            void adjust_size(std::ptrdiff_t val)
+            {
+                assert(size() + val >= 0 && "adjusting to positive size");
+                set_size(static_cast<std::size_t>(static_cast<std::ptrdiff_t>(size()) + val));
+            }
+
+            T m_buffer[N];
+        };
+
+        template <class T>
+        struct fixed_string_storage_impl
+        {
+            fixed_string_storage_impl() = default;
+
+            fixed_string_storage_impl(T ptr, std::size_t size)
+                : m_buffer(ptr), m_size(size)
+            {
+            }
+
+            T& buffer()
+            {
+                return m_buffer;
+            }
+
+            const T& buffer() const
+            {
+                return m_buffer;
+            }
+
+            std::size_t size() const
+            {
+                return m_size;
+            }
+
+            void set_size(std::size_t sz)
+            {
+                m_size = sz;
+                m_buffer[sz] = '\0';
+            }
+
+            void adjust_size(std::ptrdiff_t val)
+            {
+                m_size += std::size_t(val);
+                m_buffer[m_size] = '\0';
+            }
+
+            T m_buffer;
+            std::size_t m_size;
+        };
+
+        template <class T>
+        struct fixed_string_external_storage_impl
+        {
+            fixed_string_external_storage_impl() = default;
+
+            fixed_string_external_storage_impl(T ptr, std::ptrdiff_t/*size*/)
+            {
+                m_buffer = ptr;
+            }
+
+            T& buffer()
+            {
+                return m_buffer;
+            }
+
+            const T& buffer() const
+            {
+                return m_buffer;
+            }
+
+            void set_size(std::size_t sz)
+            {
+                m_buffer[sz] = '\0';
+            }
+
+            void adjust_size(std::ptrdiff_t val)
+            {
+                m_buffer[size() + val] = '\0';
+            }
+
+            std::size_t size() const
+            {
+                return std::strlen(m_buffer);
+            }
+
+            T m_buffer;
+        };
+
+        template <class T, bool Small>
+        struct select_fixed_storage {
+          using type = fixed_string_storage_impl<T>;
+        };
+
+        template <class T>
+        struct select_fixed_storage<T, true> {
+          using type = fixed_small_string_storage_impl<T>;
+        };
+
+        template <>
+        struct select_storage<buffer | store_size>
+        {
+            template <class T, std::size_t N>
+            using type = typename select_fixed_storage<T[N + 1], N < (1u << (8 * sizeof(T)))>::type;
+        };
+
+        template <>
+        struct select_storage<buffer>
+        {
+            template <class T, std::size_t N>
+            using type = fixed_string_external_storage_impl<T[N + 1]>;
+        };
+    }
+
+    template <class CT,
+              std::size_t N,
+              int ST,
+              template <std::size_t> class EP,
+              class TR>
+    class xbasic_fixed_string
+    {
+    public:
+
+        using traits_type = TR;
+        using value_type = CT;
+        using size_type = std::size_t;
+        using difference_type = std::ptrdiff_t;
+
+        using storage_type = typename detail::select_storage<ST>::template type<CT, N>;
+
+        using reference = value_type&;
+        using const_reference = const value_type&;
+        using pointer = value_type*;
+        using const_pointer = const value_type*;
+        using iterator = pointer;
+        using const_iterator = const_pointer;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+
+        static const size_type npos;
+
+        using self_type = xbasic_fixed_string;
+        using initializer_type = std::initializer_list<value_type>;
+        using string_type = std::basic_string<value_type, traits_type>;
+
+        using error_policy = EP<N>;
+
+        xbasic_fixed_string();
+
+        explicit xbasic_fixed_string(size_type count, value_type ch);
+        explicit xbasic_fixed_string(const self_type& other,
+                            size_type pos,
+                            size_type count = npos);
+        explicit xbasic_fixed_string(const string_type& other);
+        explicit xbasic_fixed_string(const string_type& other,
+                                     size_type pos,
+                                     size_type count = npos);
+        xbasic_fixed_string(const_pointer s, size_type count);
+        xbasic_fixed_string(const_pointer s);
+        xbasic_fixed_string(initializer_type ilist);
+
+        template <class InputIt>
+        xbasic_fixed_string(InputIt first, InputIt last);
+
+        operator string_type() const;
+
+        ~xbasic_fixed_string() = default;
+
+        xbasic_fixed_string(const self_type&) = default;
+        xbasic_fixed_string(self_type&&) = default;
+
+        self_type& operator=(const self_type&) = default;
+        self_type& operator=(self_type&&) = default;
+        self_type& operator=(const_pointer s);
+        self_type& operator=(value_type ch);
+        self_type& operator=(initializer_type ilist);
+        self_type& operator=(const string_type& str);
+
+        self_type& assign(size_type count, value_type ch);
+        self_type& assign(const self_type& other,
+                          size_type pos,
+                          size_type count = npos);
+        self_type& assign(const_pointer s, size_type count);
+        self_type& assign(const_pointer s);
+        self_type& assign(initializer_type ilist);
+        template <class InputIt>
+        self_type& assign(InputIt first, InputIt last);
+        self_type& assign(const self_type& rhs);
+        self_type& assign(self_type&& rhs);
+        self_type& assign(const string_type& str);
+        self_type& assign(const string_type& other,
+                          size_type pos,
+                          size_type count = npos);
+
+        reference at(size_type pos);
+        const_reference at(size_type pos) const;
+
+        reference operator[](size_type pos);
+        const_reference operator[](size_type pos) const;
+
+        reference front();
+        const_reference front() const;
+
+        reference back();
+        const_reference back() const;
+
+        pointer data() noexcept;
+        const_pointer data() const noexcept;
+
+        const_pointer c_str() const noexcept;
+
+        iterator begin() noexcept;
+        iterator end() noexcept;
+        const_iterator begin() const noexcept;
+        const_iterator end() const noexcept;
+        const_iterator cbegin() const noexcept;
+        const_iterator cend() const noexcept;
+
+        reverse_iterator rbegin() noexcept;
+        reverse_iterator rend() noexcept;
+        const_reverse_iterator rbegin() const noexcept;
+        const_reverse_iterator rend() const noexcept;
+        const_reverse_iterator crbegin() const noexcept;
+        const_reverse_iterator crend() const noexcept;
+
+        bool empty() const noexcept;
+        size_type size() const noexcept;
+        size_type length() const noexcept;
+        size_type max_size() const noexcept;
+
+        void clear() noexcept;
+        void push_back(value_type ch);
+        void pop_back();
+        self_type substr(size_type pos = 0, size_type count = npos) const;
+        size_type copy(pointer dest, size_type count, size_type pos = 0) const;
+        void resize(size_type count);
+        void resize(size_type count, value_type ch);
+        void swap(self_type& rhs) noexcept;
+
+        self_type& insert(size_type index, size_type count, value_type ch);
+        self_type& insert(size_type index, const_pointer s);
+        self_type& insert(size_type index, const_pointer s, size_type count);
+        self_type& insert(size_type index, const self_type& str);
+        self_type& insert(size_type index, const self_type& str,
+                          size_type index_str, size_type count = npos);
+        self_type& insert(size_type index, const string_type& str);
+        self_type& insert(size_type index, const string_type& str,
+                          size_type index_str, size_type count = npos);
+        iterator insert(const_iterator pos, value_type ch);
+        iterator insert(const_iterator pos, size_type count, value_type ch);
+        iterator insert(const_iterator pos, initializer_type ilist);
+        template <class InputIt>
+        iterator insert(const_iterator pos, InputIt first, InputIt last);
+
+        self_type& erase(size_type index = 0, size_type count = npos);
+        iterator erase(const_iterator position);
+        iterator erase(const_iterator first, const_iterator last);
+
+        self_type& append(size_type count, value_type ch);
+        self_type& append(const self_type& str);
+        self_type& append(const self_type& str,
+                          size_type pos, size_type count = npos);
+        self_type& append(const string_type& str);
+        self_type& append(const string_type& str,
+                          size_type pos, size_type count = npos);
+        self_type& append(const_pointer s, size_type count);
+        self_type& append(const_pointer s);
+        self_type& append(initializer_type ilist);
+        template <class InputIt>
+        self_type& append(InputIt first, InputIt last);
+
+        self_type& operator+=(const self_type& str);
+        self_type& operator+=(const string_type& str);
+        self_type& operator+=(value_type ch);
+        self_type& operator+=(const_pointer s);
+        self_type& operator+=(initializer_type ilist);
+
+        int compare(const self_type& str) const noexcept;
+        int compare(size_type pos1, size_type count1, const self_type& str) const;
+        int compare(size_type pos1, size_type count1, const self_type& str,
+                    size_type pos2, size_type count2 = npos) const;
+        int compare(const string_type& str) const noexcept;
+        int compare(size_type pos1, size_type count1, const string_type& str) const;
+        int compare(size_type pos1, size_type count1, const string_type& str,
+                    size_type pos2, size_type count2 = npos) const;
+        int compare(const_pointer s) const noexcept;
+        int compare(size_type pos1, size_type count1, const_pointer s) const;
+        int compare(size_type pos1, size_type count1, const_pointer s, size_type count2) const;
+
+        self_type& replace(size_type pos, size_type count, const self_type& str);
+        self_type& replace(const_iterator first, const_iterator last, const self_type& str);
+        self_type& replace(size_type pos1, size_type count1, const self_type& str,
+                           size_type pos2, size_type count2 = npos);
+        self_type& replace(size_type pos, size_type count, const string_type& str);
+        self_type& replace(const_iterator first, const_iterator last, const string_type& str);
+        self_type& replace(size_type pos1, size_type count1, const string_type& str,
+                           size_type pos2, size_type count2 = npos);
+        self_type& replace(size_type pos, size_type count, const_pointer cstr, size_type count2);
+        self_type& replace(const_iterator first, const_iterator last, const_pointer cstr, size_type count2);
+        self_type& replace(size_type pos, size_type count, const_pointer cstr);
+        self_type& replace(const_iterator first, const_iterator last, const_pointer cstr);
+        self_type& replace(size_type pos, size_type count, size_type count2, value_type ch);
+        self_type& replace(const_iterator first, const_iterator last, size_type count2, value_type ch);
+        self_type& replace(const_iterator first, const_iterator last, initializer_type ilist);
+        template <class InputIt>
+        self_type& replace(const_iterator first, const_iterator last, InputIt first2, InputIt last2);
+
+        size_type find(const self_type& str, size_type pos = 0) const noexcept;
+        size_type find(const string_type& str, size_type pos = 0) const noexcept;
+        size_type find(const_pointer s, size_type pos, size_type count) const;
+        size_type find(const_pointer s, size_type pos = 0) const;
+        size_type find(value_type ch, size_type pos = 0) const;
+
+        size_type rfind(const self_type& str, size_type pos = npos) const noexcept;
+        size_type rfind(const string_type& str, size_type pos = npos) const noexcept;
+        size_type rfind(const_pointer s, size_type pos, size_type count) const;
+        size_type rfind(const_pointer s, size_type pos = npos) const;
+        size_type rfind(value_type ch, size_type pos = npos) const;
+
+        size_type find_first_of(const self_type& str, size_type pos = 0) const noexcept;
+        size_type find_first_of(const string_type& str, size_type pos = 0) const noexcept;
+        size_type find_first_of(const_pointer s, size_type pos, size_type count) const;
+        size_type find_first_of(const_pointer s, size_type pos = 0) const;
+        size_type find_first_of(value_type ch, size_type pos = 0) const;
+
+        size_type find_first_not_of(const self_type& str, size_type pos = 0) const noexcept;
+        size_type find_first_not_of(const string_type& str, size_type pos = 0) const noexcept;
+        size_type find_first_not_of(const_pointer s, size_type pos, size_type count) const;
+        size_type find_first_not_of(const_pointer s, size_type pos = 0) const;
+        size_type find_first_not_of(value_type ch, size_type pos = 0) const;
+
+        size_type find_last_of(const self_type& str, size_type pos = 0) const noexcept;
+        size_type find_last_of(const string_type& str, size_type pos = 0) const noexcept;
+        size_type find_last_of(const_pointer s, size_type pos, size_type count) const;
+        size_type find_last_of(const_pointer s, size_type pos = 0) const;
+        size_type find_last_of(value_type ch, size_type pos = 0) const;
+
+        size_type find_last_not_of(const self_type& str, size_type pos = npos) const noexcept;
+        size_type find_last_not_of(const string_type& str, size_type pos = npos) const noexcept;
+        size_type find_last_not_of(const_pointer s, size_type pos, size_type count) const;
+        size_type find_last_not_of(const_pointer s, size_type pos = npos) const;
+        size_type find_last_not_of(value_type ch, size_type pos = npos) const;
+
+    private:
+
+        int compare_impl(const_pointer s1, size_type count1, const_pointer s2, size_type count2) const noexcept;
+        void update_null_termination() noexcept;
+        void check_index(size_type pos, size_type size, const char* what) const;
+        void check_index_strict(size_type pos, size_type size, const char* what) const;
+
+        storage_type m_storage;
+    };
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    const typename xbasic_fixed_string<CT, N, ST, EP, TR>::size_type xbasic_fixed_string<CT, N, ST, EP, TR>::npos
+        = std::basic_string<value_type, traits_type>::npos;
+
+    template <std::size_t N>
+    using xfixed_string = xbasic_fixed_string<char, N>;
+
+    template <std::size_t N>
+    using xwfixed_string = xbasic_fixed_string<wchar_t, N>;
+
+    template <std::size_t N>
+    using xu16fixed_string = xbasic_fixed_string<char16_t, N>;
+
+    template <std::size_t N>
+    using xu32fixed_string = xbasic_fixed_string<char32_t, N>;
+
+    /**************************
+     * Concatenation operator *
+     **************************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              const CT* rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              CT rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const CT* lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(CT lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              const CT* rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              CT rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const CT* lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(CT lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs);
+
+    /************************
+     * Comparison operators *
+     ************************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator==(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator==(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const CT* rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator==(const CT* lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator==(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const std::basic_string<CT, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator==(const std::basic_string<CT, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator!=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator!=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const CT* rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator!=(const CT* lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator!=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const std::basic_string<CT, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator!=(const std::basic_string<CT, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                   const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                   const CT* rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<(const CT* lhs,
+                   const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                   const std::basic_string<CT, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<(const std::basic_string<CT, TR>& lhs,
+                   const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const CT* rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<=(const CT* lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const std::basic_string<CT, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator<=(const std::basic_string<CT, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                   const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                   const CT* rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>(const CT* lhs,
+                   const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                   const std::basic_string<CT, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>(const std::basic_string<CT, TR>& lhs,
+                   const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const CT* rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>=(const CT* lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                    const std::basic_string<CT, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    bool operator>=(const std::basic_string<CT, TR>& lhs,
+                    const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept;
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    void swap(xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              xbasic_fixed_string<CT, N, ST, EP, TR>& rhs);
+
+    /******************************
+     * Input / output declaration *
+     ******************************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>& input,
+                                        xbasic_fixed_string<CT, N, ST, EP, TR>& str,
+                                        CT delim);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>&& input,
+                                        xbasic_fixed_string<CT, N, ST, EP, TR>& str,
+                                        CT delim);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>& input,
+                                        xbasic_fixed_string<CT, N, ST, EP, TR>& str);
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>&& input,
+                                        xbasic_fixed_string<CT, N, ST, EP, TR>& str);
+
+}  // namespace xtl
+
+namespace std
+{
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    struct hash<::xtl::xbasic_fixed_string<CT, N, ST, EP, TR>>
+    {
+        using argument_type = ::xtl::xbasic_fixed_string<CT, N, ST, EP, TR>;
+        using result_type = std::size_t;
+        inline result_type operator()(const argument_type& arg) const
+        {
+            return ::xtl::hash_bytes(arg.data(), arg.size(), static_cast<std::size_t>(0xc70f6907UL));
+        }
+    };
+}  // namespace std
+
+namespace xtl
+{
+    /********************************
+     * xbasic_fixed_string policies *
+     ********************************/
+
+    namespace string_policy
+    {
+        template <std::size_t N>
+        struct silent_error
+        {
+            inline static std::size_t check_size(std::size_t size)
+            {
+                return size;
+            }
+            inline static std::size_t check_add(std::size_t size1, std::size_t size2)
+            {
+                return size1 + size2;
+            }
+        };
+
+        template <std::size_t N>
+        struct throwing_error
+        {
+            inline static std::size_t check_size(std::size_t size)
+            {
+                if (size > N)
+                {
+                    std::ostringstream oss;
+                    oss << "Invalid size (" << size << ") for xbasic_fixed_string - maximal size: " << N;
+#if defined(XTL_NO_EXCEPTIONS)
+                    std::fprintf(stderr, "%s\n", oss.str().c_str());
+                    std::terminate();
+#else
+                    throw std::length_error(oss.str());
+#endif
+                }
+                return size;
+            }
+
+            inline static std::size_t check_add(std::size_t size1, std::size_t size2)
+            {
+                return check_size(size1 + size2);
+            }
+        };
+    }  // string_policy
+
+    /**************************************
+     * xbasic_fixed_string implementation *
+     **************************************/
+
+    /****************
+     * Constructors *
+     ****************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string()
+        : m_storage()
+    {
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(size_type count, value_type ch)
+        : m_storage()
+    {
+        assign(count, ch);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(const self_type& other,
+                                                                   size_type pos,
+                                                                   size_type count)
+        : m_storage()
+    {
+        assign(other, pos, count);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(const string_type& other)
+        : m_storage()
+    {
+        assign(other);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(const string_type& other,
+                                                                   size_type pos,
+                                                                   size_type count)
+        : m_storage()
+    {
+        assign(other, pos, count);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(const_pointer s, size_type count)
+        : m_storage()
+    {
+        assign(s, count);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(const_pointer s)
+        : m_storage()
+    {
+        assign(s);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(initializer_type ilist)
+        : m_storage()
+    {
+        assign(ilist);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    template <class InputIt>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::xbasic_fixed_string(InputIt first, InputIt last)
+        : m_storage()
+    {
+        assign(first, last);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>::operator string_type() const
+    {
+        return string_type(data());
+    }
+
+    /**************
+     * Assignment *
+     **************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator=(const_pointer s) -> self_type&
+    {
+        return assign(s);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator=(value_type ch) -> self_type&
+    {
+        return assign(size_type(1), ch);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator=(initializer_type ilist) -> self_type&
+    {
+        return assign(ilist);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator=(const string_type& str) -> self_type&
+    {
+        return assign(str);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(size_type count, value_type ch) -> self_type&
+    {
+        m_storage.set_size(error_policy::check_size(count));
+        traits_type::assign(data(), count, ch);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(const self_type& other,
+                                                           size_type pos,
+                                                           size_type count) -> self_type&
+    {
+        check_index_strict(pos, other.size(), "xbasic_fixed_string::assign");
+        size_type copy_count = std::min(other.size() - pos, count);
+        m_storage.set_size(error_policy::check_size(copy_count));
+        traits_type::copy(data(), other.data() + pos, copy_count);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(const_pointer s, size_type count) -> self_type&
+    {
+        m_storage.set_size(error_policy::check_size(count));
+        traits_type::copy(data(), s, count);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(const_pointer s) -> self_type&
+    {
+        std::size_t ssize = traits_type::length(s);
+        return assign(s, ssize);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(initializer_type ilist) -> self_type&
+    {
+        return assign(ilist.begin(), ilist.end());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    template <class InputIt>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(InputIt first, InputIt last) -> self_type&
+    {
+        m_storage.set_size(error_policy::check_size(static_cast<size_type>(std::distance(first, last))));
+        std::copy(first, last, data());
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(const self_type& rhs) -> self_type&
+    {
+        if (this != &rhs)
+        {
+            m_storage.set_size(rhs.size());
+            traits_type::copy(data(), rhs.data(), rhs.size());
+        }
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(self_type&& rhs) -> self_type&
+    {
+        if (this != &rhs)
+        {
+            m_storage.set_size(rhs.size());
+            traits_type::copy(data(), rhs.data(), rhs.size());
+        }
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(const string_type& other) -> self_type&
+    {
+        return assign(other.c_str());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::assign(const string_type& other,
+                                                           size_type pos,
+                                                           size_type count) -> self_type&
+    {
+        return assign(other.c_str() + pos, std::min(count, other.size() - pos));
+    }
+
+    /******************
+     * Element access *
+     ******************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::at(size_type pos) -> reference
+    {
+        check_index(pos, size(), "basic_fixed_string::at");
+        return this->operator[](pos);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::at(size_type pos) const -> const_reference
+    {
+        check_index(pos, size(), "basic_fixed_string::at");
+        return this->operator[](pos);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator[](size_type pos) -> reference
+    {
+        return data()[pos];
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator[](size_type pos) const -> const_reference
+    {
+        return data()[pos];
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::front() -> reference
+    {
+        return this->operator[](0);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::front() const -> const_reference
+    {
+        return this->operator[](0);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::back() -> reference
+    {
+        return this->operator[](size() - 1);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::back() const -> const_reference
+    {
+        return this->operator[](size() - 1);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::data() noexcept -> pointer
+    {
+        return m_storage.buffer();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::data() const noexcept -> const_pointer
+    {
+        return m_storage.buffer();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::c_str() const noexcept -> const_pointer
+    {
+        return m_storage.buffer();
+    }
+
+    /*************
+     * Iterators *
+     *************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::begin() noexcept -> iterator
+    {
+        return data();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::end() noexcept -> iterator
+    {
+        return data() + size();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::begin() const noexcept -> const_iterator
+    {
+        return cbegin();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::end() const noexcept -> const_iterator
+    {
+        return cend();
+    }
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::cbegin() const noexcept -> const_iterator
+    {
+        return data();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::cend() const noexcept -> const_iterator
+    {
+        return data() + size();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rbegin() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(end());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rend() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(begin());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(end());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(begin());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::crbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(end());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::crend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(begin());
+    }
+
+    /************
+     * Capacity *
+     ************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool xbasic_fixed_string<CT, N, ST, EP, TR>::empty() const noexcept
+    {
+        return size() == 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::size() const noexcept -> size_type
+    {
+        return m_storage.size();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::length() const noexcept -> size_type
+    {
+        return m_storage.size();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::max_size() const noexcept -> size_type
+    {
+        return N;
+    }
+
+    /**************
+     * Operations *
+     **************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void xbasic_fixed_string<CT, N, ST, EP, TR>::clear() noexcept
+    {
+        m_storage.set_size(0);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void xbasic_fixed_string<CT, N, ST, EP, TR>::push_back(value_type ch)
+    {
+        error_policy::check_add(size(), size_type(1));
+        data()[size()] = ch;
+        m_storage.adjust_size(+1);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void xbasic_fixed_string<CT, N, ST, EP, TR>::pop_back()
+    {
+        m_storage.adjust_size(-1);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::substr(size_type pos, size_type count) const -> self_type
+    {
+        return self_type(*this, pos, count);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::copy(pointer dest, size_type count, size_type pos) const -> size_type
+    {
+        check_index_strict(pos, size(), "xbasic_fixed_string::copy");
+        size_type nb_copied = std::min(count, size() - pos);
+        traits_type::copy(dest, data() + pos, nb_copied);
+        return nb_copied;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void xbasic_fixed_string<CT, N, ST, EP, TR>::resize(size_type count)
+    {
+        resize(count, value_type(' '));  // need to initialize with some value != \0
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void xbasic_fixed_string<CT, N, ST, EP, TR>::resize(size_type count, value_type ch)
+    {
+        size_type old_size = size();
+        m_storage.set_size(error_policy::check_size(count));
+        if (old_size < size())
+        {
+            traits_type::assign(data() + old_size, size() - old_size, ch);
+        }
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void xbasic_fixed_string<CT, N, ST, EP, TR>::swap(self_type& rhs) noexcept
+    {
+        self_type tmp(std::move(rhs));
+        rhs = std::move(*this);
+        *this = std::move(tmp);
+    }
+
+    /**********
+     * insert *
+     **********/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(size_type index, size_type count, value_type ch) -> self_type&
+    {
+        check_index_strict(index, size(), "xbasic_fixed_string::insert");
+        size_type old_size = size();
+        m_storage.set_size(error_policy::check_add(size(), count));
+        std::copy_backward(data() + index, data() + old_size, end());
+        traits_type::assign(data() + index, count, ch);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(size_type index, const_pointer s) -> self_type&
+    {
+        return insert(index, s, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(size_type index, const_pointer s, size_type count) -> self_type&
+    {
+        check_index_strict(index, size(), "xbasic_fixed_string::insert");
+        size_type old_size = size();
+        m_storage.set_size(error_policy::check_add(size(), count));
+        std::copy_backward(data() + index, data() + old_size, end());
+        traits_type::copy(data() + index, s, count);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(size_type index, const self_type& str) -> self_type&
+    {
+        return insert(index, str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(size_type index, const self_type& str,
+                                                           size_type index_str, size_type count) -> self_type&
+    {
+        check_index_strict(index_str, str.size(), "xbasic_fixed_string::insert");
+        return insert(index, str.data() + index_str, std::min(count, str.size() - index_str));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(size_type index, const string_type& str) -> self_type&
+    {
+        return insert(index, str.c_str(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(size_type index, const string_type& str,
+                                                           size_type index_str, size_type count) -> self_type&
+    {
+        check_index_strict(index_str, str.size(), "xbasic_fixed_string::insert");
+        return insert(index, str.c_str() + index_str, std::min(count, str.size() - index_str));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(const_iterator pos, value_type ch) -> iterator
+    {
+        return insert(pos, size_type(1), ch);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(const_iterator pos, size_type count, value_type ch) -> iterator
+    {
+        if (cbegin() <= pos && pos < cend())
+        {
+            size_type index = static_cast<size_type>(pos - cbegin());
+            insert(index, count, ch);
+            return const_cast<iterator>(pos);
+        }
+        return end();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(const_iterator pos, initializer_type ilist) -> iterator
+    {
+        return insert(pos, ilist.begin(), ilist.end());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    template <class InputIt>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::insert(const_iterator pos, InputIt first, InputIt last) -> iterator
+    {
+        if (cbegin() <= pos && pos < cend())
+        {
+            size_type index = static_cast<size_type>(pos - cbegin());
+            size_type count = static_cast<size_type>(std::distance(first, last));
+            size_type old_size = size();
+            m_storage.set_size(error_policy::check_add(size(), count));
+            std::copy_backward(data() + index, data() + old_size, end());
+            std::copy(first, last, data() + index);
+            return begin() + index;
+        }
+        return end();
+    }
+
+    /*********
+     * erase *
+     *********/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::erase(size_type index, size_type count) -> self_type&
+    {
+        check_index_strict(index, size(), "xbasic_fixed_string::erase");
+        size_type erase_count = std::min(count, size() - index);
+        // cannot use traits_type::copy because of overlapping
+        std::copy(data() + index + erase_count, data() + size(), data() + index);
+        m_storage.adjust_size(-static_cast<std::ptrdiff_t>(erase_count));
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::erase(const_iterator position) -> iterator
+    {
+        return erase(position, position + 1);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::erase(const_iterator first, const_iterator last) -> iterator
+    {
+        if (cbegin() <= first && first < cend())
+        {
+            const_iterator adapted_last = std::min(last, cend());
+            size_type erase_count = static_cast<size_type>(adapted_last - first);
+            // cannot use traits_type::copy because of overlapping
+            std::copy(adapted_last, cend(), iterator(first));
+            m_storage.adjust_size(-static_cast<std::ptrdiff_t>(erase_count));
+            return const_cast<iterator>(first);
+        }
+        return end();
+    }
+
+    /**********
+     * append *
+     **********/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(size_type count, value_type ch) -> self_type&
+    {
+        size_type old_size = m_storage.size();
+        m_storage.set_size(error_policy::check_add(size(), count));
+        traits_type::assign(data() + old_size, count, ch);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(const self_type& str) -> self_type&
+    {
+        return append(str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(const self_type& str,
+                                                           size_type pos, size_type count) -> self_type&
+    {
+        check_index_strict(pos, str.size(), "xbasic_fixed_string::append");
+        return append(str.data() + pos, std::min(count, str.size() - pos));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(const string_type& str) -> self_type&
+    {
+        return append(str.c_str(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(const string_type& str,
+                                                           size_type pos, size_type count) -> self_type&
+    {
+        check_index_strict(pos, str.size(), "xbasic_fixed_string::append");
+        return append(str.c_str() + pos, std::min(count, str.size() - pos));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(const_pointer s, size_type count) -> self_type&
+    {
+        size_type old_size = m_storage.size();
+        m_storage.set_size(error_policy::check_add(size(), count));
+        traits_type::copy(data() + old_size, s, count);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(const_pointer s) -> self_type&
+    {
+        return append(s, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(initializer_type ilist) -> self_type&
+    {
+        return append(ilist.begin(), ilist.end());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    template <class InputIt>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::append(InputIt first, InputIt last) -> self_type&
+    {
+        size_type count = static_cast<size_type>(std::distance(first, last));
+        size_type old_size = m_storage.size();
+        m_storage.set_size(error_policy::check_add(size(), count));
+        std::copy(first, last, data() + old_size);
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator+=(const self_type& str) -> self_type&
+    {
+        return append(str);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator+=(const string_type& str) -> self_type&
+    {
+        return append(str);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator+=(value_type ch) -> self_type&
+    {
+        return append(size_type(1), ch);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator+=(const_pointer s) -> self_type&
+    {
+        return append(s);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::operator+=(initializer_type ilist) -> self_type&
+    {
+        return append(ilist);
+    }
+
+    /***********
+     * compare *
+     ***********/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(const self_type& str) const noexcept
+    {
+        return compare_impl(data(), size(), str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(size_type pos1, size_type count1, const self_type& str) const
+    {
+        check_index_strict(pos1, size(), "xbasic_fixed_string::compare");
+        return compare_impl(data() + pos1, std::min(count1, size() - pos1), str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(size_type pos1, size_type count1, const self_type& str,
+                                                           size_type pos2, size_type count2) const
+    {
+        check_index_strict(pos1, size(), "xbasic_fixed_string::compare");
+        check_index_strict(pos2, str.size(), "xbasic_fixed_string::compare");
+        return compare_impl(data() + pos1, std::min(count1, size() - pos1),
+                            str.data() + pos2, std::min(count2, str.size() - pos2));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(const string_type& str) const noexcept
+    {
+        return compare_impl(data(), size(), str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(size_type pos1, size_type count1, const string_type& str) const
+    {
+        check_index_strict(pos1, size(), "xbasic_fixed_string::compare");
+        return compare_impl(data() + pos1, std::min(count1, size() - pos1), str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(size_type pos1, size_type count1, const string_type& str,
+                                                           size_type pos2, size_type count2) const
+    {
+        check_index_strict(pos1, size(), "xbasic_fixed_string::compare");
+        check_index_strict(pos2, str.size(), "xbasic_fixed_string::compare");
+        return compare_impl(data() + pos1, std::min(count1, size() - pos1),
+                            str.data() + pos2, std::min(count2, str.size() - pos2));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(const_pointer s) const noexcept
+    {
+        return compare_impl(data(), size(), s, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(size_type pos1, size_type count1, const_pointer s) const
+    {
+        return compare(pos1, count1, s, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    int xbasic_fixed_string<CT, N, ST, EP, TR>::compare(size_type pos1, size_type count1, const_pointer s, size_type count2) const
+    {
+        check_index_strict(pos1, size(), "xbasic_fixed_string::compare");
+        return compare_impl(data() + pos1, std::min(count1, size() - pos1),
+                            s, count2);
+    }
+
+    /***********
+     * replace *
+     ***********/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(size_type pos, size_type count, const self_type& str) -> self_type&
+    {
+        return replace(pos, count, str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(const_iterator first, const_iterator last,
+                                                            const self_type& str) -> self_type&
+    {
+        return replace(first, last, str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(size_type pos1, size_type count1, const self_type& str,
+                                                            size_type pos2, size_type count2) -> self_type&
+    {
+        check_index_strict(pos2, str.size(), "xbasic_fixed_string::replace");
+        return replace(pos1, count1, str.data() + pos2, std::min(count2, str.size() - pos2));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(size_type pos, size_type count, const string_type& str) -> self_type&
+    {
+        return replace(pos, count, str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(const_iterator first, const_iterator last,
+                                                            const string_type& str) -> self_type&
+    {
+        return replace(first, last, str.data(), str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(size_type pos1, size_type count1, const string_type& str,
+                                                            size_type pos2, size_type count2) -> self_type&
+    {
+        check_index_strict(pos2, str.size(), "xbasic_fixed_string::replace");
+        return replace(pos1, count1, str.data() + pos2, std::min(count2, str.size() - pos2));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(size_type pos, size_type count,
+                                                     const_pointer cstr, size_type count2) -> self_type&
+    {
+        check_index_strict(pos, size(), "xbasic_fixed_string::replace");
+        size_type erase_count = std::min(count, size() - pos);
+        size_type new_size = error_policy::check_add(size() - erase_count, count2);
+        if (erase_count > count2)
+        {
+            traits_type::copy(data() + pos, cstr, count2);
+            std::copy(cbegin() + pos + erase_count, cend(), data() + pos + count2);
+            m_storage.set_size(new_size);
+        }
+        else if (erase_count < count2)
+        {
+            std::copy_backward(cbegin() + pos + erase_count, cend(), data() + new_size);
+            traits_type::copy(data() + pos, cstr, count2);
+            m_storage.set_size(new_size);
+        }
+        else
+        {
+            traits_type::copy(data() + pos, cstr, count2);
+        }
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(const_iterator first, const_iterator last,
+                                                            const_pointer cstr, size_type count2) -> self_type&
+    {
+        if (cbegin() <= first && first < last && last <= cend())
+        {
+            size_type pos = static_cast<size_type>(first - cbegin());
+            size_type count = static_cast<size_type>(last - first);
+            return replace(pos, count, cstr, count2);
+        }
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(size_type pos, size_type count,
+                                                            const_pointer cstr) -> self_type&
+    {
+        return replace(pos, count, cstr, traits_type::length(cstr));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(const_iterator first, const_iterator last,
+                                                            const_pointer cstr) -> self_type&
+    {
+        return replace(first, last, cstr, traits_type::length(cstr));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(size_type pos, size_type count,
+                                                            size_type count2, value_type ch) -> self_type&
+    {
+        check_index_strict(pos, size(), "xbasic_fixed_string::replace");
+        size_type erase_count = std::min(count, size() - pos);
+        size_type new_size = error_policy::check_add(size() - erase_count, count2);
+        if (erase_count > count2)
+        {
+            traits_type::assign(data() + pos, count2, ch);
+            std::copy(cbegin() + pos + erase_count, cend(), data() + pos + count2);
+            m_storage.set_size(new_size);
+        }
+        else if (erase_count < count2)
+        {
+            std::copy_backward(cbegin() + pos + erase_count, cend(), data() + new_size);
+            traits_type::assign(data() + pos, count2, ch);
+            m_storage.set_size(new_size);
+        }
+        else
+        {
+            traits_type::assign(data() + pos, count2, ch);
+        }
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(const_iterator first, const_iterator last,
+                                                            size_type count2, value_type ch) -> self_type&
+    {
+        if (cbegin() <= first && first < last && last <= cend())
+        {
+            size_type pos = static_cast<size_type>(first - cbegin());
+            size_type count = static_cast<size_type>(last - first);
+            return replace(pos, count, count2, ch);
+        }
+        return *this;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(const_iterator first, const_iterator last,
+                                                            initializer_type ilist) -> self_type&
+    {
+        return replace(first, last, ilist.begin(), ilist.end());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    template <class InputIt>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::replace(const_iterator first, const_iterator last,
+                                                            InputIt first2, InputIt last2) -> self_type&
+    {
+        if (cbegin() <= first && first < last && last <= cend())
+        {
+            size_type pos = static_cast<size_type>(first - cbegin());
+            size_type erase_count = static_cast<size_type>(last - first);
+            size_type count2 = static_cast<size_type>(std::distance(first2, last2));
+            size_type new_size = error_policy::check_add(size() - erase_count, count2);
+            if (erase_count > count2)
+            {
+                std::copy(first2, last2, data() + pos);
+                std::copy(cbegin() + pos + erase_count, cend(), data() + pos + count2);
+                m_storage.set_size(new_size);
+            }
+            else if (erase_count < count2)
+            {
+                std::copy_backward(cbegin() + pos + erase_count, cend(), data() + new_size);
+                std::copy(first2, last2, data() + pos);
+                m_storage.set_size(new_size);
+            }
+            else
+            {
+                std::copy(first2, last2, data() + pos);
+            }
+        }
+        return *this;
+    }
+
+    /********
+     * find *
+     ********/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find(const self_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find(const string_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::find(const_pointer s, size_type pos, size_type count) const -> size_type
+    {
+        if (count == size_type(0) && pos <= size())
+        {
+            return pos;
+        }
+
+        size_type nm;
+        if (pos < size() && count <= (nm = size() - pos))
+        {
+            const_pointer uptr, vptr;
+            for (nm -= count - 1, vptr = data() + pos;
+                 (uptr = traits_type::find(vptr, nm, *s)) != 0;
+                 nm -= size_type(uptr - vptr) + 1ul, vptr = uptr + 1ul)
+            {
+                if (traits_type::compare(uptr, s, count) == 0)
+                {
+                    return size_type(uptr - data());
+                }
+            }
+        }
+        return npos;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find(const_pointer s, size_type pos) const -> size_type
+    {
+        return find(s, pos, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find(value_type ch, size_type pos) const -> size_type
+    {
+        return find((const_pointer)(&ch), pos, size_type(1));
+    }
+
+    /*********
+     * rfind *
+     *********/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rfind(const self_type& str, size_type pos) const noexcept -> size_type
+    {
+        return rfind(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rfind(const string_type& str, size_type pos) const noexcept -> size_type
+    {
+        return rfind(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::rfind(const_pointer s, size_type pos, size_type count) const -> size_type
+    {
+        if (count == 0)
+        {
+            return std::min(pos, size());
+        }
+
+        if (count <= size())
+        {
+            const_pointer uptr = data() + std::min(pos, size() - count);
+            for (;; --uptr)
+            {
+                if (traits_type::eq(*uptr, *s) && traits_type::compare(uptr, s, count) == 0)
+                {
+                    return size_type(uptr - data());
+                }
+                else if (uptr == data())
+                {
+                    break;
+                }
+            }
+        }
+        return npos;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rfind(const_pointer s, size_type pos) const -> size_type
+    {
+        return rfind(s, pos, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::rfind(value_type ch, size_type pos) const -> size_type
+    {
+        return rfind((const_pointer)(&ch), pos, size_type(1));
+    }
+
+    /*****************
+     * find_first_of *
+     *****************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_of(const self_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_first_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_of(const string_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_first_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_of(const_pointer s, size_type pos, size_type count) const -> size_type
+    {
+        if (size_type(0) < count && pos < size())
+        {
+            const_pointer vptr = data() + size();
+            for (const_pointer uptr = data() + pos; uptr < vptr; ++uptr)
+            {
+                if (traits_type::find(s, count, *uptr) != 0)
+                {
+                    return size_type(uptr - data());
+                }
+            }
+        }
+        return npos;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_of(const_pointer s, size_type pos) const -> size_type
+    {
+        return find_first_of(s, pos, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_of(value_type ch, size_type pos) const -> size_type
+    {
+        return find_first_of((const_pointer)(&ch), pos, size_type(1));
+    }
+
+    /*********************
+     * find_first_not_of *
+     *********************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_not_of(const self_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_first_not_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_not_of(const string_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_first_not_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_not_of(const_pointer s, size_type pos, size_type count) const -> size_type
+    {
+        if (pos < size())
+        {
+            const_pointer vptr = data() + size();
+            for (const_pointer uptr = data() + pos; uptr < vptr; ++uptr)
+            {
+                if (traits_type::find(s, count, *uptr) == 0)
+                {
+                    return size_type(uptr - data());
+                }
+            }
+        }
+        return npos;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_not_of(const_pointer s, size_type pos) const -> size_type
+    {
+        return find_first_not_of(s, pos, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_first_not_of(value_type ch, size_type pos) const -> size_type
+    {
+        return find_first_not_of((const_pointer)(&ch), pos, size_type(1));
+    }
+
+    /****************
+     * find_last_of *
+     ****************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_of(const self_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_last_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_of(const string_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_last_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_of(const_pointer s, size_type pos, size_type count) const -> size_type
+    {
+        if (size_type(0) < count && size_type(0) < size())
+        {
+            const_pointer uptr = data() + std::min(pos, size() - 1);
+            for (;; --uptr)
+            {
+                if (traits_type::find(s, count, *uptr) != 0)
+                {
+                    return size_type(uptr - data());
+                }
+                else if (uptr == data())
+                {
+                    break;
+                }
+            }
+        }
+        return npos;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_of(const_pointer s, size_type pos) const -> size_type
+    {
+        return find_last_of(s, pos, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_of(value_type ch, size_type pos) const -> size_type
+    {
+        return find_last_of((const_pointer)(&ch), pos, size_type(1));
+    }
+
+    /********************
+     * find_last_not_of *
+     ********************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_not_of(const self_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_last_not_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_not_of(const string_type& str, size_type pos) const noexcept -> size_type
+    {
+        return find_last_not_of(str.data(), pos, str.size());
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_not_of(const_pointer s, size_type pos, size_type count) const -> size_type
+    {
+        if (size_type(0) < size())
+        {
+            const_pointer uptr = data() + std::min(pos, size() - 1);
+            for (;; --uptr)
+            {
+                if (traits_type::find(s, count, *uptr) == 0)
+                {
+                    return size_type(uptr - data());
+                }
+                else if (uptr == data())
+                {
+                    break;
+                }
+            }
+        }
+        return npos;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_not_of(const_pointer s, size_type pos) const -> size_type
+    {
+        return find_last_not_of(s, pos, traits_type::length(s));
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline auto xbasic_fixed_string<CT, N, ST, EP, TR>::find_last_not_of(value_type ch, size_type pos) const -> size_type
+    {
+        return find_last_not_of((const_pointer)(&ch), pos, size_type(1));
+    }
+
+    /*******************
+     * Private methods *
+     *******************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    int xbasic_fixed_string<CT, N, ST, EP, TR>::compare_impl(const_pointer s1, size_type count1,
+                                                         const_pointer s2, size_type count2) const noexcept
+    {
+        size_type rlen = std::min(count1, count2);
+        int res = traits_type::compare(s1, s2, rlen);
+        if (res == 0)
+        {
+            return count1 < count2 ? -1 : (count1 > count2 ? 1 : 0);
+        }
+        else
+        {
+            return res;
+        }
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void xbasic_fixed_string<CT, N, ST, EP, TR>::update_null_termination() noexcept
+    {
+        data()[size()] = '\0';
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    void xbasic_fixed_string<CT, N, ST, EP, TR>::check_index(size_type pos, size_type size, const char* what) const
+    {
+        if (pos >= size)
+        {
+#if defined(XTL_NO_EXCEPTIONS)
+            std::fprintf(stderr, "%s\n", what);
+            std::terminate();
+#else
+            throw std::out_of_range(what);
+#endif
+        }
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    void xbasic_fixed_string<CT, N, ST, EP, TR>::check_index_strict(size_type pos, size_type size, const char* what) const
+    {
+        check_index(pos, size + 1, what);
+    }
+
+    /**************************
+     * Concatenation operator *
+     **************************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(lhs);
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              const CT* rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(lhs);
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              CT rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(lhs);
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const CT* lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(lhs);
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(CT lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs)
+    {
+        using size_type = typename xbasic_fixed_string<CT, N, ST, EP, TR>::size_type;
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(size_type(1), lhs);
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(std::move(lhs));
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(lhs);
+        return res += std::move(rhs);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(std::move(lhs));
+        return res += std::move(rhs);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              const CT* rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(std::move(lhs));
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const xbasic_fixed_string<CT, N, ST, EP, TR>&& lhs,
+              CT rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(std::move(lhs));
+        return res += rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(const CT* lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs)
+    {
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(lhs);
+        return res += std::move(rhs);
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline xbasic_fixed_string<CT, N, ST, EP, TR>
+    operator+(CT lhs,
+              const xbasic_fixed_string<CT, N, ST, EP, TR>&& rhs)
+    {
+        using size_type = typename xbasic_fixed_string<CT, N, ST, EP, TR>::size_type;
+        xbasic_fixed_string<CT, N, ST, EP, TR> res(size_type(1), lhs);
+        return res += std::move(rhs);
+    }
+
+    /************************
+    * Comparison operators *
+    ************************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator==(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.compare(rhs) == 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator==(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const CT* rhs) noexcept
+    {
+        return lhs.compare(rhs) == 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator==(const CT* lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return rhs == lhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator==(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const std::basic_string<CT, TR>& rhs) noexcept
+    {
+        return lhs == rhs.c_str();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator==(const std::basic_string<CT, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.c_str() == rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator!=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.compare(rhs) != 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator!=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const CT* rhs) noexcept
+    {
+        return lhs.compare(rhs) != 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator!=(const CT* lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return rhs != lhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator!=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const std::basic_string<CT, TR>& rhs) noexcept
+    {
+        return lhs != rhs.c_str();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator!=(const std::basic_string<CT, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.c_str() != rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                          const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.compare(rhs) < 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                          const CT* rhs) noexcept
+    {
+        return lhs.compare(rhs) < 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<(const CT* lhs,
+                          const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return rhs > lhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                          const std::basic_string<CT, TR>& rhs) noexcept
+    {
+        return lhs < rhs.c_str();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<(const std::basic_string<CT, TR>& lhs,
+                          const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.c_str() < rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.compare(rhs) <= 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const CT* rhs) noexcept
+    {
+        return lhs.compare(rhs) <= 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<=(const CT* lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return rhs >= lhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const std::basic_string<CT, TR>& rhs) noexcept
+    {
+        return lhs <= rhs.c_str();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator<=(const std::basic_string<CT, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.c_str() <= rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                          const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.compare(rhs) > 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                          const CT* rhs) noexcept
+    {
+        return lhs.compare(rhs) > 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>(const CT* lhs,
+                          const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return rhs < lhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                          const std::basic_string<CT, TR>& rhs) noexcept
+    {
+        return lhs > rhs.c_str();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>(const std::basic_string<CT, TR>& lhs,
+                          const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.c_str() > rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.compare(rhs) >= 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const CT* rhs) noexcept
+    {
+        return lhs.compare(rhs) >= 0;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>=(const CT* lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return rhs <= lhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>=(const xbasic_fixed_string<CT, N, ST, EP, TR>& lhs,
+                           const std::basic_string<CT, TR>& rhs) noexcept
+    {
+        return lhs >= rhs.c_str();
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline bool operator>=(const std::basic_string<CT, TR>& lhs,
+                           const xbasic_fixed_string<CT, N, ST, EP, TR>& rhs) noexcept
+    {
+        return lhs.c_str() >= rhs;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline void swap(xbasic_fixed_string<CT, N, ST, EP, TR>& lhs, xbasic_fixed_string<CT, N, ST, EP, TR>& rhs)
+    {
+        lhs.swap(rhs);
+    }
+
+    /******************
+     * Input / output *
+     ******************/
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline std::basic_ostream<CT, TR>& operator<<(std::basic_ostream<CT, TR>& os,
+                                                  const xbasic_fixed_string<CT, N, ST, EP, TR>& str)
+    {
+        os << str.c_str();
+        return os;
+    }
+
+#ifdef __CLING__
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    nlohmann::json mime_bundle_repr(const xbasic_fixed_string<CT, N, ST, EP, TR>& str)
+    {
+        auto bundle = nlohmann::json::object();
+        bundle["text/plain"] = str.c_str();
+        return bundle;
+    }
+#endif
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline std::basic_istream<CT, TR>& operator>>(std::basic_istream<CT, TR>& is,
+                                                  xbasic_fixed_string<CT, N, ST, EP, TR>& str)
+    {
+        // Not optimal
+        std::string tmp;
+        is >> tmp;
+        str = tmp.c_str();
+        return is;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>& input,
+                                               xbasic_fixed_string<CT, N, ST, EP, TR>& str,
+                                               CT delim)
+    {
+        std::string tmp;
+        auto& ret = std::getline(input, tmp, delim);
+        str = tmp;
+        return ret;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>&& input,
+                                               xbasic_fixed_string<CT, N, ST, EP, TR>& str,
+                                               CT delim)
+    {
+        std::string tmp;
+        auto& ret = std::getline(std::move(input), tmp, delim);
+        str = tmp;
+        return ret;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>& input,
+                                               xbasic_fixed_string<CT, N, ST, EP, TR>& str)
+    {
+        std::string tmp;
+        auto& ret = std::getline(input, tmp);
+        str = tmp;
+        return ret;
+    }
+
+    template <class CT, std::size_t N, int ST, template <std::size_t> class EP, class TR>
+    inline std::basic_istream<CT, TR>& getline(std::basic_istream<CT, TR>&& input,
+                                               xbasic_fixed_string<CT, N, ST, EP, TR>& str)
+    {
+        std::string tmp;
+        auto& ret = std::getline(std::move(input), tmp);
+        str = tmp;
+        return ret;
+    }
+}
+
+#endif  // xtl

+ 435 - 0
3rd/numpy/include/xtl/xclosure.hpp

@@ -0,0 +1,435 @@
+/***************************************************************************
+* Copyright (c) Sylvain Corlay and Johan Mabille and Wolf Vollprecht       *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_CLOSURE_HPP
+#define XTL_CLOSURE_HPP
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "xtl_config.hpp"
+
+namespace xtl
+{
+
+#ifdef __cpp_lib_as_const
+    using std::as_const;
+#else
+    template <class T>
+    constexpr std::add_const_t<T>& as_const(T& t) noexcept
+    {
+        return t;
+    }
+
+    template <class T>
+    constexpr std::add_const_t<T&&>& as_const(T&& t) noexcept = delete;
+#endif
+
+    /****************
+     * closure_type *
+     ****************/
+
+    template <class S>
+    struct closure_type
+    {
+        using underlying_type = std::conditional_t<std::is_const<std::remove_reference_t<S>>::value,
+                                                   const std::decay_t<S>,
+                                                   std::decay_t<S>>;
+        using type = typename std::conditional<std::is_lvalue_reference<S>::value,
+                                               underlying_type&,
+                                               underlying_type>::type;
+    };
+
+    template <class S>
+    using closure_type_t = typename closure_type<S>::type;
+
+    template <class S>
+    struct const_closure_type
+    {
+        using underlying_type = std::decay_t<S>;
+        using type = typename std::conditional<std::is_lvalue_reference<S>::value,
+                                               std::add_const_t<underlying_type>&,
+                                               underlying_type>::type;
+    };
+
+    template <class S>
+    using const_closure_type_t = typename const_closure_type<S>::type;
+
+    /********************
+     * ptr_closure_type *
+     ********************/
+
+    template <class S>
+    struct ptr_closure_type
+    {
+        using underlying_type = std::conditional_t<std::is_const<std::remove_reference_t<S>>::value,
+                                                   const std::decay_t<S>,
+                                                   std::decay_t<S>>;
+        using type = std::conditional_t<std::is_lvalue_reference<S>::value,
+                                        underlying_type*,
+                                        underlying_type>;
+    };
+
+    template <class S>
+    using ptr_closure_type_t = typename ptr_closure_type<S>::type;
+
+    template <class S>
+    struct const_ptr_closure_type
+    {
+        using underlying_type = const std::decay_t<S>;
+        using type = std::conditional_t<std::is_lvalue_reference<S>::value,
+                                        underlying_type*,
+                                        underlying_type>;
+    };
+
+    template <class S>
+    using const_ptr_closure_type_t = typename const_ptr_closure_type<S>::type;
+
+    /********************
+     * xclosure_wrapper *
+     ********************/
+
+    template <class CT>
+    class xclosure_wrapper
+    {
+    public:
+
+        using self_type = xclosure_wrapper<CT>;
+        using closure_type = CT;
+        using const_closure_type = std::add_const_t<CT>;
+        using value_type = std::decay_t<CT>;
+
+        using reference = std::conditional_t<
+            std::is_const<std::remove_reference_t<CT>>::value,
+            const value_type&, value_type&
+        >;
+
+        using pointer = std::conditional_t<
+            std::is_const<std::remove_reference_t<CT>>::value,
+            const value_type*, value_type*
+        >;
+
+        xclosure_wrapper(value_type&& e);
+        xclosure_wrapper(reference e);
+
+        xclosure_wrapper(const self_type& rhs) = default;
+        xclosure_wrapper(self_type&& rhs) = default;
+
+        self_type& operator=(const self_type& rhs);
+        self_type& operator=(self_type&& rhs);
+        
+        template <class T>
+        self_type& operator=(T&&);
+
+        operator closure_type() noexcept;
+        operator const_closure_type() const noexcept;
+
+        std::add_lvalue_reference_t<closure_type> get() & noexcept;
+        std::add_lvalue_reference_t<std::add_const_t<closure_type>> get() const & noexcept;
+        closure_type get() && noexcept;
+
+        pointer operator&() noexcept;
+
+        bool equal(const self_type& rhs) const;
+        void swap(self_type& rhs);
+
+    private:
+
+        using storing_type = ptr_closure_type_t<CT>;
+        storing_type m_wrappee;
+
+        template <class T>
+        std::enable_if_t<std::is_lvalue_reference<CT>::value, std::add_lvalue_reference_t<std::remove_pointer_t<T>>>
+        deref(T val) const;
+
+        template <class T>
+        std::enable_if_t<!std::is_lvalue_reference<CT>::value, std::add_lvalue_reference_t<T>>
+        deref(T& val) const;
+
+        template <class T>
+        std::enable_if_t<std::is_lvalue_reference<CT>::value, T>
+        get_pointer(T val) const;
+
+        template <class T>
+        std::enable_if_t<!std::is_lvalue_reference<CT>::value, std::add_pointer_t<T>>
+        get_pointer(T& val) const;
+
+        template <class T, class CTA>
+        std::enable_if_t<std::is_lvalue_reference<CT>::value, T>
+        get_storage_init(CTA&& e) const;
+
+        template <class T, class CTA>
+        std::enable_if_t<!std::is_lvalue_reference<CT>::value, T>
+        get_storage_init(CTA&& e) const;
+    };
+
+    // TODO: remove this (backward compatibility)
+    template <class CT>
+    using closure_wrapper = xclosure_wrapper<CT>;
+
+    /********************
+     * xclosure_pointer *
+     ********************/
+
+    template <class CT>
+    class xclosure_pointer
+    {
+    public:
+
+        using self_type = xclosure_pointer<CT>;
+        using closure_type = CT;
+        using value_type = std::decay_t<CT>;
+
+        using reference = std::conditional_t<
+            std::is_const<std::remove_reference_t<CT>>::value,
+            const value_type&, value_type&
+        >;
+
+        using const_reference = const value_type&;
+
+        using pointer = std::conditional_t<
+            std::is_const<std::remove_reference_t<CT>>::value,
+            const value_type*, value_type*
+        >;
+
+        xclosure_pointer(value_type&& e);
+        xclosure_pointer(reference e);
+
+        reference operator*() noexcept;
+        const_reference operator*() const noexcept;
+        pointer operator->() const noexcept;
+
+    private:
+
+        using storing_type = closure_type_t<CT>;
+        storing_type m_wrappee;
+    };
+
+    /***********************************
+     * xclosure_wrapper implementation *
+     ***********************************/
+
+    template <class CT>
+    inline xclosure_wrapper<CT>::xclosure_wrapper(value_type&& e)
+        : m_wrappee(get_storage_init<storing_type>(std::move(e)))
+    {
+    }
+
+    template <class CT>
+    inline xclosure_wrapper<CT>::xclosure_wrapper(reference e)
+        : m_wrappee(get_storage_init<storing_type>(e))
+    {
+    }
+
+    template <class CT>
+    inline auto xclosure_wrapper<CT>::operator=(const self_type& rhs) -> self_type&
+    {
+        deref(m_wrappee) = deref(rhs.m_wrappee);
+        return *this;
+    }
+
+    template <class CT>
+    inline auto xclosure_wrapper<CT>::operator=(self_type&& rhs) -> self_type&
+    {
+        swap(rhs);
+        return *this;
+    }
+
+    template <class CT>
+    template <class T>
+    inline auto xclosure_wrapper<CT>::operator=(T&& t) -> self_type&
+    {
+        deref(m_wrappee) = std::forward<T>(t);
+        return *this;
+    }
+
+    template <class CT>
+    inline xclosure_wrapper<CT>::operator typename xclosure_wrapper<CT>::closure_type() noexcept
+    {
+        return deref(m_wrappee);
+    }
+
+    template <class CT>
+    inline xclosure_wrapper<CT>::operator typename xclosure_wrapper<CT>::const_closure_type() const noexcept
+    {
+        return deref(m_wrappee);
+    }
+
+    template <class CT>
+    inline auto xclosure_wrapper<CT>::get() & noexcept -> std::add_lvalue_reference_t<closure_type>
+    {
+        return deref(m_wrappee);
+    }
+
+    template <class CT>
+    inline auto xclosure_wrapper<CT>::get() const & noexcept -> std::add_lvalue_reference_t<std::add_const_t<closure_type>>
+    {
+        return deref(m_wrappee);
+    }
+
+    template <class CT>
+    inline auto xclosure_wrapper<CT>::get() && noexcept -> closure_type
+    {
+        return deref(m_wrappee);
+    }
+
+    template <class CT>
+    inline auto xclosure_wrapper<CT>::operator&() noexcept -> pointer
+    {
+        return get_pointer(m_wrappee);
+    }
+
+    template <class CT>
+    template <class T>
+    inline std::enable_if_t<std::is_lvalue_reference<CT>::value, std::add_lvalue_reference_t<std::remove_pointer_t<T>>>
+    xclosure_wrapper<CT>::deref(T val) const
+    {
+        return *val;
+    }
+
+    template <class CT>
+    template <class T>
+    inline std::enable_if_t<!std::is_lvalue_reference<CT>::value, std::add_lvalue_reference_t<T>>
+    xclosure_wrapper<CT>::deref(T& val) const
+    {
+        return val;
+    }
+
+    template <class CT>
+    template <class T>
+    inline std::enable_if_t<std::is_lvalue_reference<CT>::value, T>
+    xclosure_wrapper<CT>::get_pointer(T val) const
+    {
+        return val;
+    }
+
+    template <class CT>
+    template <class T>
+    inline std::enable_if_t<!std::is_lvalue_reference<CT>::value, std::add_pointer_t<T>>
+    xclosure_wrapper<CT>::get_pointer(T& val) const
+    {
+        return &val;
+    }
+
+    template <class CT>
+    template <class T, class CTA>
+    inline std::enable_if_t<std::is_lvalue_reference<CT>::value, T>
+    xclosure_wrapper<CT>::get_storage_init(CTA&& e) const
+    {
+        return &e;
+    }
+
+    template <class CT>
+    template <class T, class CTA>
+    inline std::enable_if_t<!std::is_lvalue_reference<CT>::value, T>
+    xclosure_wrapper<CT>::get_storage_init(CTA&& e) const
+    {
+        return e;
+    }
+
+    template <class CT>
+    inline bool xclosure_wrapper<CT>::equal(const self_type& rhs) const
+    {
+        return deref(m_wrappee) == rhs.deref(rhs.m_wrappee);
+    }
+
+    template <class CT>
+    inline void xclosure_wrapper<CT>::swap(self_type& rhs)
+    {
+        using std::swap;
+        swap(deref(m_wrappee), deref(rhs.m_wrappee));
+    }
+
+    template <class CT>
+    inline bool operator==(const xclosure_wrapper<CT>& lhs, const xclosure_wrapper<CT>& rhs)
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <class CT>
+    inline bool operator!=(const xclosure_wrapper<CT>& lhs, const xclosure_wrapper<CT>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    template <class CT>
+    inline void swap(xclosure_wrapper<CT>& lhs, xclosure_wrapper<CT>& rhs)
+    {
+        lhs.swap(rhs);
+    }
+
+    /***********************************
+     * xclosure_pointer implementation *
+     ***********************************/
+
+    template <class CT>
+    inline xclosure_pointer<CT>::xclosure_pointer(value_type&& e)
+        : m_wrappee(std::move(e))
+    {
+    }
+
+    template <class CT>
+    inline xclosure_pointer<CT>::xclosure_pointer(reference e)
+        : m_wrappee(e)
+    {
+    }
+
+    template <class CT>
+    inline auto xclosure_pointer<CT>::operator*() noexcept -> reference
+    {
+        return m_wrappee;
+    }
+
+    template <class CT>
+    inline auto xclosure_pointer<CT>::operator*() const noexcept -> const_reference
+    {
+        return m_wrappee;
+    }
+
+    template <class CT>
+    inline auto xclosure_pointer<CT>::operator->() const noexcept -> pointer
+    {
+        return const_cast<pointer>(std::addressof(m_wrappee));
+    }
+
+    /*****************************
+     * closure and const_closure *
+     *****************************/
+
+    template <class T>
+    inline decltype(auto) closure(T&& t)
+    {
+        return xclosure_wrapper<closure_type_t<T>>(std::forward<T>(t));
+    }
+
+    template <class T>
+    inline decltype(auto) const_closure(T&& t)
+    {
+        return xclosure_wrapper<const_closure_type_t<T>>(std::forward<T>(t));
+    }
+
+    /*********************************************
+     * closure_pointer and const_closure_pointer *
+     *********************************************/
+
+    template <class T>
+    inline auto closure_pointer(T&& t)
+    {
+        return xclosure_pointer<closure_type_t<T>>(std::forward<T>(t));
+    }
+
+    template <class T>
+    inline auto const_closure_pointer(T&& t)
+    {
+        return xclosure_pointer<const_closure_type_t<T>>(std::forward<T>(t));
+    }
+}
+
+#endif

+ 179 - 0
3rd/numpy/include/xtl/xcompare.hpp

@@ -0,0 +1,179 @@
+/***************************************************************************
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_COMPARE_HPP
+#define XTL_COMPARE_HPP
+
+#include <type_traits>
+
+namespace xtl
+{
+
+    /**
+     * @defgroup xtl_xcompare
+     *
+     * Compare the values of two integers t and u. Unlike builtin comparison operators,
+     * negative signed integers always compare less than (and not equal to) unsigned integers.
+     */
+
+    namespace detail
+    {
+        template <class T, class U>
+        struct same_signedness :
+           std::integral_constant<bool, std::is_signed<T>::value == std::is_signed<U>::value>
+        {
+        };
+
+        template <
+            class T,
+            class U,
+            std::enable_if_t<same_signedness<T, U>::value, bool> = true
+        >
+        constexpr bool cmp_equal_impl(T t, U u) noexcept
+        {
+            return t == u;
+        }
+
+        template <
+            class T,
+            class U,
+            std::enable_if_t<
+                !same_signedness<T, U>::value && std::is_signed<T>::value,
+                bool
+            > = true
+        >
+        constexpr bool cmp_equal_impl(T t, U u) noexcept
+        {
+            using UT = std::make_unsigned_t<T>;
+            return t < 0 ? false : static_cast<UT>(t) == u;
+        }
+
+        template <
+            class T,
+            class U,
+            std::enable_if_t<
+                !same_signedness<T, U>::value && !std::is_signed<T>::value,
+                bool
+            > = true
+        >
+        constexpr bool cmp_equal_impl(T t, U u) noexcept
+        {
+            using UU = std::make_unsigned_t<U>;
+            return u < 0 ? false : t == static_cast<UU>(u);
+        }
+    }
+
+    /**
+     * ``true`` if @p t is equal to @p u.
+     *
+     * @ingroup xtl_xcompare
+     */
+    template <class T, class U>
+    constexpr bool cmp_equal(T t, U u) noexcept
+    {
+        return detail::cmp_equal_impl(t, u);
+    }
+
+    /**
+     * ``true`` if @p t is not equal to @p u.
+     *
+     * @ingroup xtl_xcompare
+     */
+    template <class T, class U>
+    constexpr bool cmp_not_equal(T t, U u) noexcept
+    {
+        return !cmp_equal(t, u);
+    }
+
+    namespace detail
+    {
+        template <
+            class T,
+            class U,
+            std::enable_if_t<detail::same_signedness<T, U>::value, bool> = true
+        >
+        constexpr bool cmp_less_impl(T t, U u) noexcept
+        {
+            return t < u;
+        }
+
+        template <
+            class T,
+            class U,
+            std::enable_if_t<
+                !detail::same_signedness<T, U>::value && std::is_signed<T>::value,
+                bool
+            > = true
+        >
+        constexpr bool cmp_less_impl(T t, U u) noexcept
+        {
+            using UT = std::make_unsigned_t<T>;
+            return t < 0 ? true : static_cast<UT>(t) < u;
+        }
+
+        template <
+            class T,
+            class U,
+            std::enable_if_t<
+                !detail::same_signedness<T, U>::value && !std::is_signed<T>::value,
+                bool
+            > = true
+        >
+        constexpr bool cmp_less_impl(T t, U u) noexcept
+        {
+            using UU = std::make_unsigned_t<U>;
+            return u < 0 ? false : t < static_cast<UU>(u);
+        }
+    }
+
+    /**
+     * ``true`` if @p t is striclty less than @p u.
+     *
+     * @ingroup xtl_xcompare
+     */
+    template <class T, class U>
+    constexpr bool cmp_less(T t, U u) noexcept
+    {
+        return detail::cmp_less_impl(t, u);
+    }
+
+    /**
+     * ``true`` if @p t is striclty greater than @p u.
+     *
+     * @ingroup xtl_xcompare
+     */
+    template <class T, class U>
+    constexpr bool cmp_greater(T t, U u) noexcept
+    {
+        return cmp_less(u, t);
+    }
+
+    /**
+     * ``true`` if @p t is less or equal to @p u.
+     *
+     * @ingroup xtl_xcompare
+     */
+    template <class T, class U>
+    constexpr bool cmp_less_equal(T t, U u) noexcept
+    {
+        return !cmp_greater(t, u);
+    }
+
+    /**
+     * ``true`` if @p t is greater or equal to @p u.
+     *
+     * @ingroup xtl_xcompare
+     */
+    template <class T, class U>
+    constexpr bool cmp_greater_equal(T t, U u) noexcept
+    {
+        return !cmp_less(t, u);
+    }
+}
+
+#endif

+ 1361 - 0
3rd/numpy/include/xtl/xcomplex.hpp

@@ -0,0 +1,1361 @@
+/***************************************************************************
+* Copyright (c) Sylvain Corlay and Johan Mabille and Wolf Vollprecht       *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_COMPLEX_HPP
+#define XTL_COMPLEX_HPP
+
+#if !defined(_MSC_VER)
+#include <cmath>
+using std::copysign;
+#endif
+
+#include <complex>
+#include <cstddef>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <sstream>
+#include <string>
+
+#ifdef __CLING__
+#include <nlohmann/json.hpp>
+#endif
+
+#include "xclosure.hpp"
+#include "xtl_config.hpp"
+#include "xtype_traits.hpp"
+
+namespace xtl
+{
+    template <class CTR, class CTI = CTR, bool ieee_compliant = false>
+    class xcomplex;
+
+    /**************
+     * is_complex *
+     **************/
+
+    namespace detail
+    {
+        template <class T>
+        struct is_complex : std::false_type
+        {
+        };
+
+        template <class T>
+        struct is_complex<std::complex<T>> : std::true_type
+        {
+        };
+    }
+
+    template <class T>
+    struct is_complex
+    {
+        static constexpr bool value = detail::is_complex<std::decay_t<T>>::value;
+    };
+
+    /***************
+     * is_xcomplex *
+     ***************/
+
+    namespace detail
+    {
+        template <class T>
+        struct is_xcomplex : std::false_type
+        {
+        };
+
+        template <class CTR, class CTI, bool B>
+        struct is_xcomplex<xcomplex<CTR, CTI, B>> : std::true_type
+        {
+        };
+    }
+
+    template <class T>
+    struct is_xcomplex
+    {
+        static constexpr bool value = detail::is_xcomplex<std::decay_t<T>>::value;
+    };
+
+    /******************
+     * is_gen_complex *
+     ******************/
+
+    template <class T>
+    using is_gen_complex = disjunction<is_complex<std::decay_t<T>>, is_xcomplex<std::decay_t<T>>>;
+
+    /****************************
+     * enable / disable complex *
+     ****************************/
+
+    template <class E, class R = void>
+    using disable_xcomplex = std::enable_if_t<!is_gen_complex<E>::value, R>;
+
+    template <class E, class R = void>
+    using enable_xcomplex = std::enable_if_t<is_gen_complex<E>::value, R>;
+
+    /*****************
+     * enable_scalar *
+     *****************/
+
+    template <class E, class R = void>
+    using enable_scalar = std::enable_if_t<xtl::is_arithmetic<E>::value, R>;
+
+    /*******************
+     * common_xcomplex *
+     *******************/
+
+    template <class CTR1, class CTI1, bool ieee1, class CTR2, class CTI2, bool ieee2>
+    struct common_xcomplex
+    {
+        using type = xcomplex<std::common_type_t<CTR1, CTI1>, std::common_type_t<CTR2, CTI2>, ieee1 || ieee2>;
+    };
+
+    template <class CTR1, class CTI1, bool ieee1, class CTR2, class CTI2, bool ieee2>
+    using common_xcomplex_t = typename common_xcomplex<CTR1, CTI1, ieee1, CTR2, CTI2, ieee2>::type;
+
+    /**********************
+     * temporary_xcomplex *
+     **********************/
+
+    template <class CTR, class CTI, bool ieee>
+    struct temporary_xcomplex
+    {
+        using type = xcomplex<std::decay_t<CTR>, std::decay_t<CTI>, ieee>;
+    };
+
+    template <class CTR, class CTI, bool ieee>
+    using temporary_xcomplex_t = typename temporary_xcomplex<CTR, CTI, ieee>::type;
+
+    /************
+     * xcomplex *
+     ************/
+
+    template <class CTR, class CTI, bool ieee_compliant>
+    class xcomplex
+    {
+    public:
+
+        static_assert(std::is_same<std::decay_t<CTR>, std::decay_t<CTI>>::value,
+                      "closure types must have the same value type");
+
+        using value_type = std::common_type_t<CTR, CTI>;
+        using self_type = xcomplex<CTR, CTI, ieee_compliant>;
+        using temporary_type = temporary_xcomplex_t<CTR, CTI, ieee_compliant>;
+
+        using real_reference = std::add_lvalue_reference_t<CTR>;
+        using real_const_reference = std::add_lvalue_reference_t<std::add_const_t<CTR>>;
+        using real_rvalue_reference = std::conditional_t<std::is_reference<CTR>::value, apply_cv_t<CTR, value_type>&, value_type>;
+        using real_rvalue_const_reference = std::conditional_t<std::is_reference<CTR>::value, const value_type&, value_type>;
+
+        using imag_reference = std::add_lvalue_reference_t<CTI>;
+        using imag_const_reference = std::add_lvalue_reference_t<std::add_const_t<CTI>>;
+        using imag_rvalue_reference = std::conditional_t<std::is_reference<CTI>::value, apply_cv_t<CTI, value_type>&, value_type>;
+        using imag_rvalue_const_reference = std::conditional_t<std::is_reference<CTI>::value, const value_type&, value_type>;
+
+        constexpr xcomplex() noexcept
+            : m_real(), m_imag()
+        {
+        }
+
+        template <class OCTR,
+            std::enable_if_t<
+                conjunction<
+                    negation<is_gen_complex<OCTR>>,
+                    std::is_constructible<CTR, OCTR&&>,
+                    std::is_convertible<OCTR&&, CTR>
+                >::value,
+                bool
+            > = true>
+        constexpr xcomplex(OCTR&& re) noexcept
+            : m_real(std::forward<OCTR>(re)), m_imag()
+        {
+        }
+
+        template <class OCTR,
+            std::enable_if_t<
+                conjunction<
+                    negation<is_gen_complex<OCTR>>,
+                    std::is_constructible<CTR, OCTR&&>,
+                    negation<std::is_convertible<OCTR&&, CTR>>
+            >::value,
+            bool
+        > = true>
+        explicit constexpr xcomplex(OCTR&& re) noexcept
+            : m_real(std::forward<OCTR>(re)), m_imag()
+        {
+        }
+
+        template <class OCTR, class OCTI>
+        explicit constexpr xcomplex(OCTR&& re, OCTI&& im) noexcept
+            : m_real(std::forward<OCTR>(re)), m_imag(std::forward<OCTI>(im))
+        {
+        }
+
+        template <class OCTR, class OCTI, bool OB>
+        explicit constexpr xcomplex(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept
+            : m_real(rhs.real()), m_imag(rhs.imag())
+        {
+        }
+
+        template <class OCTR, class OCTI, bool OB>
+        explicit constexpr xcomplex(xcomplex<OCTR, OCTI, OB>&& rhs) noexcept
+            : m_real(std::move(rhs).real()), m_imag(std::move(rhs).imag())
+        {
+        }
+
+        template <class T>
+        constexpr xcomplex(const std::complex<T>& rhs) noexcept
+            : m_real(rhs.real()), m_imag(rhs.imag())
+        {
+        }
+
+        template <class T>
+        constexpr xcomplex(std::complex<T>&& rhs) noexcept
+            : m_real(std::move(rhs).real()), m_imag(std::move(rhs).imag())
+        {
+        }
+
+        template <class OCTR>
+        disable_xcomplex<OCTR, self_type&> operator=(OCTR&& rhs) noexcept;
+        template <class OCTR, class OCTI, bool OB>
+        self_type& operator=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept;
+        template <class OCTR, class OCTI, bool OB>
+        self_type& operator=(xcomplex<OCTR, OCTI, OB>&& rhs) noexcept;
+
+        operator std::complex<std::decay_t<CTR>>() const noexcept;
+
+        template <class OCTR, class OCTI, bool OB>
+        self_type& operator+=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept;
+        template <class OCTR, class OCTI, bool OB>
+        self_type& operator-=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept;
+        template <class OCTR, class OCTI, bool OB>
+        self_type& operator*=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept;
+        template <class OCTR, class OCTI, bool OB>
+        self_type& operator/=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept;
+
+        template <class T>
+        disable_xcomplex<T, self_type&> operator+=(const T& rhs) noexcept;
+        template <class T>
+        disable_xcomplex<T, self_type&> operator-=(const T& rhs) noexcept;
+        template <class T>
+        disable_xcomplex<T, self_type&> operator*=(const T& rhs) noexcept;
+        template <class T>
+        disable_xcomplex<T, self_type&> operator/=(const T& rhs) noexcept;
+
+        real_reference real() & noexcept;
+        real_rvalue_reference real() && noexcept;
+        constexpr real_const_reference real() const & noexcept;
+        constexpr real_rvalue_const_reference real() const && noexcept;
+
+        imag_reference imag() & noexcept;
+        imag_rvalue_reference imag() && noexcept;
+        constexpr imag_const_reference imag() const & noexcept;
+        constexpr imag_rvalue_const_reference imag() const && noexcept;
+
+        xclosure_pointer<self_type&> operator&() & noexcept;
+        xclosure_pointer<const self_type&> operator&() const & noexcept;
+        xclosure_pointer<self_type> operator&() && noexcept;
+
+    private:
+
+        CTR m_real;
+        CTI m_imag;
+    };
+
+    /**********************
+     * xcomplex operators *
+     **********************/
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    bool operator==(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept;
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    bool operator!=(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept;
+
+    template <class OC, class OT, class CTR, class CTI, bool B>
+    std::basic_ostream<OC, OT>& operator<<(std::basic_ostream<OC, OT>& out, const xcomplex<CTR, CTI, B>& c) noexcept;
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    operator+(const xcomplex<CTR, CTI, B>& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    operator-(const xcomplex<CTR, CTI, B>& rhs) noexcept;
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator+(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator+(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator+(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept;
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator-(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator-(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator-(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept;
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator*(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator*(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator*(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept;
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator/(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator/(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept;
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator/(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept;
+
+    /*****************
+     * real and imag *
+     *****************/
+
+    template <class E>
+    decltype(auto) real(E&& e) noexcept;
+
+    template <class E>
+    decltype(auto) imag(E&& e) noexcept;
+
+    /***************************
+     * xcomplex free functions *
+     ***************************/
+
+    template <class CTR, class CTI, bool B>
+    typename xcomplex<CTR, CTI, B>::value_type
+    abs(const xcomplex<CTR, CTI, B>& rhs);
+
+    template <class CTR, class CTI, bool B>
+    typename xcomplex<CTR, CTI, B>::value_type
+    arg(const xcomplex<CTR, CTI, B>& rhs);
+
+    template <class CTR, class CTI, bool B>
+    typename xcomplex<CTR, CTI, B>::value_type
+    norm(const xcomplex<CTR, CTI, B>& rhs);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    conj(const xcomplex<CTR, CTI, B>& rhs);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    proj(const xcomplex<CTR, CTI, B>& rhs);
+
+    /**********************************
+     * xcomplex exponential functions *
+     **********************************/
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    exp(const xcomplex<CTR, CTI, B>& rhs);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    log(const xcomplex<CTR, CTI, B>& rhs);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    log10(const xcomplex<CTR, CTI, B>& rhs);
+
+    /****************************
+     * xcomplex power functions *
+     ****************************/
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    pow(const xcomplex<CTR1, CTI1, B1>& x, const xcomplex<CTR2, CTI2, B2>& y);
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    pow(const xcomplex<CTR, CTI, B>& x, const T& y);
+
+    template <class CTR, class CTI, bool B, class T>
+    enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    pow(const T& x, const xcomplex<CTR, CTI, B>& y);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    sqrt(const xcomplex<CTR, CTI, B>& x);
+
+    /************************************
+     * xcomplex trigonometric functions *
+     ************************************/
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    sin(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    cos(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    tan(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    asin(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    acos(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    atan(const xcomplex<CTR, CTI, B>& x);
+
+    /*********************************
+     * xcomplex hyperbolic functions *
+     *********************************/
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    sinh(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    cosh(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    tanh(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    asinh(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    acosh(const xcomplex<CTR, CTI, B>& x);
+
+    template <class CTR, class CTI, bool B>
+    temporary_xcomplex_t<CTR, CTI, B>
+    atanh(const xcomplex<CTR, CTI, B>& x);
+
+    /***************************
+     * xcomplex implementation *
+     ***************************/
+
+    template <class CTR, class CTI, bool B>
+    template <class OCTR>
+    inline auto xcomplex<CTR, CTI, B>::operator=(OCTR&& rhs) noexcept -> disable_xcomplex<OCTR, self_type&>
+    {
+        m_real = std::forward<OCTR>(rhs);
+        m_imag = std::decay_t<CTI>();
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class OCTR, class OCTI, bool OB>
+    inline auto xcomplex<CTR, CTI, B>::operator=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept -> self_type&
+    {
+        m_real = rhs.m_real;
+        m_imag = rhs.m_imag;
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class OCTR, class OCTI, bool OB>
+    inline auto xcomplex<CTR, CTI, B>::operator=(xcomplex<OCTR, OCTI, OB>&& rhs) noexcept -> self_type&
+    {
+        m_real = std::move(rhs.m_real);
+        m_imag = std::move(rhs.m_imag);
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline xcomplex<CTR, CTI, B>::operator std::complex<std::decay_t<CTR>>() const noexcept
+    {
+        return std::complex<std::decay_t<CTR>>(m_real, m_imag);
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class OCTR, class OCTI, bool OB>
+    inline auto xcomplex<CTR, CTI, B>::operator+=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept -> self_type&
+    {
+        m_real += rhs.m_real;
+        m_imag += rhs.m_imag;
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class OCTR, class OCTI, bool OB>
+    inline auto xcomplex<CTR, CTI, B>::operator-=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept -> self_type&
+    {
+        m_real -= rhs.m_real;
+        m_imag -= rhs.m_imag;
+        return *this;
+    }
+
+    namespace detail
+    {
+        template <bool ieee_compliant>
+        struct xcomplex_multiplier
+        {
+            template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+            static auto mul(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs)
+            {
+                using return_type = temporary_xcomplex_t<CTR1, CTI1, B1>;
+                using value_type = typename return_type::value_type;
+                value_type a = lhs.real();
+                value_type b = lhs.imag();
+                value_type c = rhs.real();
+                value_type d = rhs.imag();
+                return return_type(a*c - b*d, a*d + b*c);
+            }
+
+            template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+            static auto div(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs)
+            {
+                using return_type = temporary_xcomplex_t<CTR1, CTI1, B1>;
+                using value_type = typename return_type::value_type;
+                value_type a = lhs.real();
+                value_type b = lhs.imag();
+                value_type c = rhs.real();
+                value_type d = rhs.imag();
+                value_type e = c*c + d*d;
+                return return_type((c*a + d*b) / e, (c*b - d*a) / e);
+            }
+        };
+
+        template <>
+        struct xcomplex_multiplier<true>
+        {
+            template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+            static auto mul(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs)
+            {
+                using return_type = temporary_xcomplex_t<CTR1, CTI1, B1>;
+                using value_type = typename return_type::value_type;
+                value_type a = lhs.real();
+                value_type b = lhs.imag();
+                value_type c = rhs.real();
+                value_type d = rhs.imag();
+                value_type ac = a * c;
+                value_type bd = b * d;
+                value_type ad = a * d;
+                value_type bc = b * c;
+                value_type x = ac - bd;
+                value_type y = ad + bc;
+                if (std::isnan(x) && std::isnan(y))
+                {
+                    bool recalc = false;
+                    if (std::isinf(a) || std::isinf(b))
+                    {
+                        a = copysign(std::isinf(a) ? value_type(1) : value_type(0), a);
+                        b = copysign(std::isinf(b) ? value_type(1) : value_type(0), b);
+                        if (std::isnan(c))
+                        {
+                            c = copysign(value_type(0), c);
+                        }
+                        if (std::isnan(d))
+                        {
+                            d = copysign(value_type(0), d);
+                        }
+                        recalc = true;
+                    }
+                    if (std::isinf(c) || std::isinf(d))
+                    {
+                        c = copysign(std::isinf(c) ? value_type(1) : value_type(0), c);
+                        d = copysign(std::isinf(c) ? value_type(1) : value_type(0), d);
+                        if (std::isnan(a))
+                        {
+                            a = copysign(value_type(0), a);
+                        }
+                        if (std::isnan(b))
+                        {
+                            b = copysign(value_type(0), b);
+                        }
+                        recalc = true;
+                    }
+                    if (!recalc && (std::isinf(ac) || std::isinf(bd) || std::isinf(ad) || std::isinf(bc)))
+                    {
+                        if (std::isnan(a))
+                        {
+                            a = copysign(value_type(0), a);
+                        }
+                        if (std::isnan(b))
+                        {
+                            b = copysign(value_type(0), b);
+                        }
+                        if (std::isnan(c))
+                        {
+                            c = copysign(value_type(0), c);
+                        }
+                        if (std::isnan(d))
+                        {
+                            d = copysign(value_type(0), d);
+                        }
+                        recalc = true;
+                    }
+                    if (recalc)
+                    {
+                        x = std::numeric_limits<value_type>::infinity() * (a * c - b * d);
+                        y = std::numeric_limits<value_type>::infinity() * (a * d + b * c);
+                    }
+                }
+                return return_type(x, y);
+            }
+
+            template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+            static auto div(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs)
+            {
+                using return_type = temporary_xcomplex_t<CTR1, CTI1, B1>;
+                using value_type = typename return_type::value_type;
+                value_type a = lhs.real();
+                value_type b = lhs.imag();
+                value_type c = rhs.real();
+                value_type d = rhs.imag();
+                value_type logbw = std::logb(std::fmax(std::fabs(c), std::fabs(d)));
+                int ilogbw = 0;
+                if (std::isfinite(logbw))
+                {
+                    ilogbw = static_cast<int>(logbw);
+                    c = std::scalbn(c, -ilogbw);
+                    d = std::scalbn(d, -ilogbw);
+                }
+                value_type denom = c*c + d*d;
+                value_type x = std::scalbn((a*c + b*d) / denom, -ilogbw);
+                value_type y = std::scalbn((b*c - a*d) / denom, -ilogbw);
+                if (std::isnan(x) && std::isnan(y))
+                {
+                    if ((denom == value_type(0)) && (!std::isnan(a) || !std::isnan(b)))
+                    {
+                        x = copysign(std::numeric_limits<value_type>::infinity(), c) * a;
+                        y = copysign(std::numeric_limits<value_type>::infinity(), c) * b;
+                    }
+                    else if ((std::isinf(a) || std::isinf(b)) && std::isfinite(c) && std::isfinite(d))
+                    {
+                        a = copysign(std::isinf(a) ? value_type(1) : value_type(0), a);
+                        b = copysign(std::isinf(b) ? value_type(1) : value_type(0), b);
+                        x = std::numeric_limits<value_type>::infinity() * (a*c + b*d);
+                        y = std::numeric_limits<value_type>::infinity() * (b*c - a*d);
+                    }
+                    else if (std::isinf(logbw) && logbw > value_type(0) && std::isfinite(a) && std::isfinite(b))
+                    {
+                        c = copysign(std::isinf(c) ? value_type(1) : value_type(0), c);
+                        d = copysign(std::isinf(d) ? value_type(1) : value_type(0), d);
+                        x = value_type(0) * (a*c + b*d);
+                        y = value_type(0) * (b*c - a*d);
+                    }
+                }
+                return std::complex<value_type>(x, y);
+            }
+        };
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class OCTR, class OCTI, bool OB>
+    inline auto xcomplex<CTR, CTI, B>::operator*=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept -> self_type&
+    {
+        *this = detail::xcomplex_multiplier<B || OB>::mul(*this, rhs);
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class OCTR, class OCTI, bool OB>
+    inline auto xcomplex<CTR, CTI, B>::operator/=(const xcomplex<OCTR, OCTI, OB>& rhs) noexcept -> self_type&
+    {
+        *this = detail::xcomplex_multiplier<B || OB>::div(*this, rhs);
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class T>
+    inline auto xcomplex<CTR, CTI, B>::operator+=(const T& rhs) noexcept -> disable_xcomplex<T, self_type&>
+    {
+        m_real += rhs;
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class T>
+    inline auto xcomplex<CTR, CTI, B>::operator-=(const T& rhs) noexcept -> disable_xcomplex<T, self_type&>
+    {
+        m_real -= rhs;
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class T>
+    inline auto xcomplex<CTR, CTI, B>::operator*=(const T& rhs) noexcept -> disable_xcomplex<T, self_type&>
+    {
+        m_real *= rhs;
+        m_imag *= rhs;
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    template <class T>
+    inline auto xcomplex<CTR, CTI, B>::operator/=(const T& rhs) noexcept -> disable_xcomplex<T, self_type&>
+    {
+        m_real /= rhs;
+        m_imag /= rhs;
+        return *this;
+    }
+
+    template <class CTR, class CTI, bool B>
+    auto xcomplex<CTR, CTI, B>::real() & noexcept -> real_reference
+    {
+        return m_real;
+    }
+
+    template <class CTR, class CTI, bool B>
+    auto xcomplex<CTR, CTI, B>::real() && noexcept -> real_rvalue_reference
+    {
+        return m_real;
+    }
+
+    template <class CTR, class CTI, bool B>
+    constexpr auto xcomplex<CTR, CTI, B>::real() const & noexcept -> real_const_reference
+    {
+        return m_real;
+    }
+
+    template <class CTR, class CTI, bool B>
+    constexpr auto xcomplex<CTR, CTI, B>::real() const && noexcept -> real_rvalue_const_reference
+    {
+        return m_real;
+    }
+
+    template <class CTR, class CTI, bool B>
+    auto xcomplex<CTR, CTI, B>::imag() & noexcept -> imag_reference
+    {
+        return m_imag;
+    }
+
+    template <class CTR, class CTI, bool B>
+    auto xcomplex<CTR, CTI, B>::imag() && noexcept -> imag_rvalue_reference
+    {
+        return m_imag;
+    }
+
+    template <class CTR, class CTI, bool B>
+    constexpr auto xcomplex<CTR, CTI, B>::imag() const & noexcept -> imag_const_reference
+    {
+        return m_imag;
+    }
+
+    template <class CTR, class CTI, bool B>
+    constexpr auto xcomplex<CTR, CTI, B>::imag() const && noexcept -> imag_rvalue_const_reference
+    {
+        return m_imag;
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline auto xcomplex<CTR, CTI, B>::operator&() & noexcept -> xclosure_pointer<self_type&>
+    {
+        return xclosure_pointer<self_type&>(*this);
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline auto xcomplex<CTR, CTI, B>::operator&() const & noexcept -> xclosure_pointer<const self_type&>
+    {
+        return xclosure_pointer<const self_type&>(*this);
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline auto xcomplex<CTR, CTI, B>::operator&() && noexcept -> xclosure_pointer<self_type>
+    {
+        return xclosure_pointer<self_type>(std::move(*this));
+    }
+
+    /*************************************
+     * xcomplex operators implementation *
+     *************************************/
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    inline bool operator==(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept
+    {
+        return lhs.real() == rhs.real() && lhs.imag() == rhs.imag();
+    }
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    inline bool operator!=(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept
+    {
+        return !(lhs == rhs);
+    }
+
+    template <class OC, class OT, class CTR, class CTI, bool B>
+    inline std::basic_ostream<OC, OT>& operator<<(std::basic_ostream<OC, OT>& out, const xcomplex<CTR, CTI, B>& c) noexcept
+    {
+        out << "(" << c.real() << "," << c.imag() << ")";
+        return out;
+    }
+
+#ifdef __CLING__
+    template <class CTR, class CTI, bool B>
+    nlohmann::json mime_bundle_repr(const xcomplex<CTR, CTI, B>& c)
+    {
+        auto bundle = nlohmann::json::object();
+        std::stringstream tmp;
+        tmp << c;
+        bundle["text/plain"] = tmp.str();
+        return bundle;
+    }
+#endif
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    operator+(const xcomplex<CTR, CTI, B>& rhs) noexcept
+    {
+        return rhs;
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    operator-(const xcomplex<CTR, CTI, B>& rhs) noexcept
+    {
+        return temporary_xcomplex_t<CTR, CTI, B>(-rhs.real(), -rhs.imag());
+    }
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    inline common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator+(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept
+    {
+        common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2> res(lhs);
+        res += rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator+(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res += rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator+(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res += rhs;
+        return res;
+    }
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    inline common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator-(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept
+    {
+        common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2> res(lhs);
+        res -= rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator-(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res -= rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator-(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res -= rhs;
+        return res;
+    }
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    inline common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator*(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept
+    {
+        common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2> res(lhs);
+        res *= rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator*(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res *= rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator*(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res *= rhs;
+        return res;
+    }
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    inline common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    operator/(const xcomplex<CTR1, CTI1, B1>& lhs, const xcomplex<CTR2, CTI2, B2>& rhs) noexcept
+    {
+        common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2> res(lhs);
+        res /= rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator/(const xcomplex<CTR, CTI, B>& lhs, const T& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res /= rhs;
+        return res;
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    operator/(const T& lhs, const xcomplex<CTR, CTI, B>& rhs) noexcept
+    {
+        temporary_xcomplex_t<CTR, CTI, B> res(lhs);
+        res /= rhs;
+        return res;
+    }
+
+    /***************************
+     * xcomplex free functions *
+     ***************************/
+
+    template <class CTR, class CTI, bool B>
+    inline typename xcomplex<CTR, CTI, B>::value_type
+    abs(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::abs(std::complex<value_type>(rhs));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline typename xcomplex<CTR, CTI, B>::value_type
+    arg(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::arg(std::complex<value_type>(rhs));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline typename xcomplex<CTR, CTI, B>::value_type
+    norm(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::norm(std::complex<value_type>(rhs));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    conj(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::conj(std::complex<value_type>(rhs));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    proj(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::proj(std::complex<value_type>(rhs));
+    }
+
+    /*************************************************
+     * xcomplex exponential functions implementation *
+     *************************************************/
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    exp(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::exp(std::complex<value_type>(rhs));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    log(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::log(std::complex<value_type>(rhs));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    log10(const xcomplex<CTR, CTI, B>& rhs)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::log10(std::complex<value_type>(rhs));
+    }
+
+    /*******************************************
+     * xcomplex power functions implementation *
+     *******************************************/
+
+    template <class CTR1, class CTI1, bool B1, class CTR2, class CTI2, bool B2>
+    inline common_xcomplex_t<CTR1, CTI1, B1, CTR2, CTI2, B2>
+    pow(const xcomplex<CTR1, CTI1, B1>& x, const xcomplex<CTR2, CTI2, B2>& y)
+    {
+        using value_type1 = typename xcomplex<CTR1, CTI1, B1>::value_type;
+        using value_type2 = typename xcomplex<CTR2, CTI2, B2>::value_type;
+        return std::pow(std::complex<value_type1>(x), std::complex<value_type2>(y));
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    pow(const xcomplex<CTR, CTI, B>& x, const T& y)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::pow(std::complex<value_type>(x), y);
+    }
+
+    template <class CTR, class CTI, bool B, class T>
+    inline enable_scalar<T, temporary_xcomplex_t<CTR, CTI, B>>
+    pow(const T& x, const xcomplex<CTR, CTI, B>& y)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::pow(x, std::complex<value_type>(y));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    sqrt(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename xcomplex<CTR, CTI, B>::value_type;
+        return std::sqrt(std::complex<value_type>(x));
+    }
+
+    /***************************************************
+     * xcomplex trigonometric functions implementation *
+     ***************************************************/
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    sin(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::sin(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    cos(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::cos(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    tan(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::tan(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    asin(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::asin(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    acos(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::acos(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    atan(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::atan(std::complex<value_type>(x));
+    }
+
+    /************************************************
+     * xcomplex hyperbolic functions implementation *
+     ************************************************/
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    sinh(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::sinh(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    cosh(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::cosh(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    tanh(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::tanh(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    asinh(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::asinh(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    acosh(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::acosh(std::complex<value_type>(x));
+    }
+
+    template <class CTR, class CTI, bool B>
+    inline temporary_xcomplex_t<CTR, CTI, B>
+    atanh(const xcomplex<CTR, CTI, B>& x)
+    {
+        using value_type = typename temporary_xcomplex_t<CTR, CTI, B>::value_type;
+        return std::atanh(std::complex<value_type>(x));
+    }
+
+    /*********************************
+     * forward_offset implementation *
+     *********************************/
+
+    namespace detail
+    {
+
+        template <class T, class M>
+        struct forward_type
+        {
+            using type = apply_cv_t<T, M>;
+        };
+
+        template <class T, class M>
+        struct forward_type<T&, M>
+        {
+            using type = apply_cv_t<T, M>&;
+        };
+
+        template <class T, class M>
+        using forward_type_t = typename forward_type<T, M>::type;
+    }
+
+    template <class M, std::size_t I, class T>
+    constexpr detail::forward_type_t<T, M> forward_offset(T&& v) noexcept
+    {
+        using forward_type = detail::forward_type_t<T, M>;
+        using cv_value_type = std::remove_reference_t<forward_type>;
+        using byte_type = apply_cv_t<std::remove_reference_t<T>, char>;
+
+        return static_cast<forward_type>(
+            *reinterpret_cast<cv_value_type*>(
+                reinterpret_cast<byte_type*>(&v) + I
+            )
+        );
+    }
+
+    /**********************************************
+     * forward_real & forward_imag implementation *
+     **********************************************/
+
+    // forward_real
+
+    template <class T>
+    auto forward_real(T&& v)
+        -> std::enable_if_t<!is_gen_complex<T>::value, detail::forward_type_t<T, T>>  // real case -> forward
+    {
+        return static_cast<detail::forward_type_t<T, T>>(v);
+    }
+
+    template <class T>
+    auto forward_real(T&& v)
+        -> std::enable_if_t<is_complex<T>::value, detail::forward_type_t<T, typename std::decay_t<T>::value_type>>  // complex case -> forward the real part
+    {
+        return forward_offset<typename std::decay_t<T>::value_type, 0>(v);
+    }
+
+    template <class T>
+    auto forward_real(T&& v)
+        -> std::enable_if_t<is_xcomplex<T>::value, decltype(std::forward<T>(v).real())>
+    {
+        return std::forward<T>(v).real();
+    }
+
+    // forward_imag
+
+    template <class T>
+    auto forward_imag(T &&)
+        -> std::enable_if_t<!is_gen_complex<T>::value, std::decay_t<T>>  // real case -> always return 0 by value
+    {
+        return 0;
+    }
+
+    template <class T>
+    auto forward_imag(T&& v)
+        -> std::enable_if_t<is_complex<T>::value, detail::forward_type_t<T, typename std::decay_t<T>::value_type>>  // complex case -> forwards the imaginary part
+    {
+        using real_type = typename std::decay_t<T>::value_type;
+        return forward_offset<real_type, sizeof(real_type)>(v);
+    }
+
+    template <class T>
+    auto forward_imag(T&& v)
+        -> std::enable_if_t<is_xcomplex<T>::value, decltype(std::forward<T>(v).imag())>
+    {
+        return std::forward<T>(v).imag();
+    }
+
+    /******************************
+     * real & imag implementation *
+     ******************************/
+
+    template <class E>
+    inline decltype(auto) real(E&& e) noexcept
+    {
+        return forward_real(std::forward<E>(e));
+    }
+
+    template <class E>
+    inline decltype(auto) imag(E&& e) noexcept
+    {
+        return forward_imag(std::forward<E>(e));
+    }
+
+    /**********************
+     * complex_value_type *
+     **********************/
+
+    template <class T>
+    struct complex_value_type
+    {
+        using type = T;
+    };
+
+    template <class T>
+    struct complex_value_type<std::complex<T>>
+    {
+        using type = T;
+    };
+
+    template <class CTR, class CTI, bool B>
+    struct complex_value_type<xcomplex<CTR, CTI, B>>
+    {
+        using type = xcomplex<CTR, CTI, B>;
+    };
+
+    template <class T>
+    using complex_value_type_t = typename complex_value_type<T>::type;
+
+    /******************************************************
+     * operator overloads for complex and closure wrapper *
+     *****************************************************/
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator+(const std::complex<C>& c, const T& t)
+    {
+        std::complex<C> result(c);
+        result += t;
+        return result;
+    }
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator+(const T& t, const std::complex<C>& c)
+    {
+        std::complex<C> result(t);
+        result += c;
+        return result;
+    }
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator-(const std::complex<C>& c, const T& t)
+    {
+        std::complex<C> result(c);
+        result -= t;
+        return result;
+    }
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator-(const T& t, const std::complex<C>& c)
+    {
+        std::complex<C> result(t);
+        result -= c;
+        return result;
+    }
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator*(const std::complex<C>& c, const T& t)
+    {
+        std::complex<C> result(c);
+        result *= t;
+        return result;
+    }
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator*(const T& t, const std::complex<C>& c)
+    {
+        std::complex<C> result(t);
+        result *= c;
+        return result;
+    }
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator/(const std::complex<C>& c, const T& t)
+    {
+        std::complex<C> result(c);
+        result /= t;
+        return result;
+    }
+
+    template <class C, class T, std::enable_if_t<!xtl::is_complex<T>::value, int> = 0>
+    std::complex<C> operator/(const T& t, const std::complex<C>& c)
+    {
+        std::complex<C> result(t);
+        result /= c;
+        return result;
+    }
+}
+
+#endif

+ 578 - 0
3rd/numpy/include/xtl/xcomplex_sequence.hpp

@@ -0,0 +1,578 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XCOMPLEX_SEQUENCE_HPP
+#define XTL_XCOMPLEX_SEQUENCE_HPP
+
+#include <array>
+#include <vector>
+#include <algorithm>
+
+#include "xclosure.hpp"
+#include "xcomplex.hpp"
+#include "xiterator_base.hpp"
+#include "xsequence.hpp"
+
+namespace xtl
+{
+    /************************************
+     * Optimized 1-D xcomplex container *
+     ************************************/
+
+    template <class IT, bool ieee_compliant>
+    class xcomplex_iterator;
+
+    template <class C, bool ieee_compliant>
+    class xcomplex_sequence
+    {
+    public:
+
+        using container_type = C;
+        using cvt = typename C::value_type;
+
+        using value_type = xcomplex<cvt, cvt, ieee_compliant>;
+        using reference = xcomplex<cvt&, cvt&, ieee_compliant>;
+        using const_reference = xcomplex<const cvt&, const cvt&, ieee_compliant>;
+        using pointer = xclosure_pointer<reference>;
+        using const_pointer = xclosure_pointer<const_reference>;
+        using size_type = typename container_type::size_type;
+        using difference_type = typename container_type::difference_type;
+
+        using iterator = xcomplex_iterator<typename C::iterator, ieee_compliant>;
+        using const_iterator = xcomplex_iterator<typename C::const_iterator, ieee_compliant>;
+        using reverse_iterator = xcomplex_iterator<typename C::reverse_iterator, ieee_compliant>;
+        using const_reverse_iterator = xcomplex_iterator<typename C::const_reverse_iterator, ieee_compliant>;
+
+        bool empty() const noexcept;
+        size_type size() const noexcept;
+        size_type max_size() const noexcept;
+
+        reference at(size_type i);
+        const_reference at(size_type i) const;
+
+        reference operator[](size_type i);
+        const_reference operator[](size_type i) const;
+
+        reference front();
+        const_reference front() const;
+
+        reference back();
+        const_reference back() const;
+
+        iterator begin() noexcept;
+        iterator end() noexcept;
+
+        const_iterator begin() const noexcept;
+        const_iterator end() const noexcept;
+        const_iterator cbegin() const noexcept;
+        const_iterator cend() const noexcept;
+
+        reverse_iterator rbegin() noexcept;
+        reverse_iterator rend() noexcept;
+
+        const_reverse_iterator rbegin() const noexcept;
+        const_reverse_iterator rend() const noexcept;
+        const_reverse_iterator crbegin() const noexcept;
+        const_reverse_iterator crend() const noexcept;
+
+        container_type real() && noexcept;
+        container_type& real() & noexcept;
+        const container_type& real() const & noexcept;
+
+        container_type imag() && noexcept;
+        container_type& imag() & noexcept;
+        const container_type& imag() const & noexcept;
+
+    protected:
+
+        xcomplex_sequence() = default;
+        xcomplex_sequence(size_type s);
+        xcomplex_sequence(size_type s, const value_type& v);
+        template <class TR, class TC, bool B>
+        xcomplex_sequence(size_type s, const xcomplex<TR, TC, B>& v);
+        xcomplex_sequence(std::initializer_list<value_type> init);
+
+        ~xcomplex_sequence() = default;
+
+        xcomplex_sequence(const xcomplex_sequence&) = default;
+        xcomplex_sequence& operator=(const xcomplex_sequence&) = default;
+
+        xcomplex_sequence(xcomplex_sequence&&) = default;
+        xcomplex_sequence& operator=(xcomplex_sequence&&) = default;
+
+        container_type m_real;
+        container_type m_imag;
+    };
+
+    template <class C, bool B>
+    bool operator==(const xcomplex_sequence<C, B>& lhs, const xcomplex_sequence<C, B>& rhs);
+
+    template <class C, bool B>
+    bool operator!=(const xcomplex_sequence<C, B>& lhs, const xcomplex_sequence<C, B>& rhs);
+
+    /******************
+     * xcomplex_array *
+     ******************/
+
+    template <class T, std::size_t N, bool ieee_compliant = false>
+    class xcomplex_array : public xcomplex_sequence<std::array<T, N>, ieee_compliant>
+    {
+    public:
+
+        using base_type = xcomplex_sequence<std::array<T, N>, ieee_compliant>;
+        using value_type = typename base_type::value_type;
+        using size_type = typename base_type::size_type;
+
+        xcomplex_array() = default;
+        xcomplex_array(size_type s);
+        xcomplex_array(size_type s, const value_type& v);
+
+        template <class TR, class TI, bool B>
+        xcomplex_array(size_type s, const xcomplex<TR, TI, B>& v);
+    };
+
+    /*******************
+     * xcomplex_vector *
+     *******************/
+
+    template <class T, bool ieee_compliant = false, class A = std::allocator<T>>
+    class xcomplex_vector : public xcomplex_sequence<std::vector<T, A>, ieee_compliant>
+    {
+    public:
+
+        using base_type = xcomplex_sequence<std::vector<T, A>, ieee_compliant>;
+        using value_type = typename base_type::value_type;
+        using size_type = typename base_type::size_type;
+
+        xcomplex_vector() = default;
+        xcomplex_vector(size_type s);
+        xcomplex_vector(size_type s, const value_type& v);
+        xcomplex_vector(std::initializer_list<value_type> init);
+
+        template <class TR, class TI, bool B>
+        xcomplex_vector(size_type s, const xcomplex<TR, TI, B>& v);
+
+        void resize(size_type);
+        void resize(size_type, const value_type&);
+        template <class TR, class TI, bool B>
+        void resize(size_type s, const xcomplex<TR, TI, B>& v);
+    };
+
+    /*********************
+     * xcomplex_iterator *
+     *********************/
+
+    template <class IT, bool ieee_compliant>
+    struct xcomplex_iterator_traits
+    {
+        using iterator_type = xcomplex_iterator<IT, ieee_compliant>;
+        using value_type = xcomplex<typename IT::value_type, typename IT::value_type, ieee_compliant>;
+        using reference = xcomplex<typename IT::reference, typename IT::reference, ieee_compliant>;
+        using pointer = xclosure_pointer<reference>;
+        using difference_type = typename IT::difference_type;
+    };
+
+    template <class IT, bool B>
+    class xcomplex_iterator : public xrandom_access_iterator_base2<xcomplex_iterator_traits<IT, B>>
+    {
+    public:
+
+        using self_type = xcomplex_iterator<IT, B>;
+        using base_type = xrandom_access_iterator_base2<xcomplex_iterator_traits<IT, B>>;
+
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using pointer = typename base_type::pointer;
+        using difference_type = typename base_type::difference_type;
+
+        xcomplex_iterator() = default;
+        xcomplex_iterator(IT it_real, IT it_imag);
+
+        self_type& operator++();
+        self_type& operator--();
+
+        self_type& operator+=(difference_type n);
+        self_type& operator-=(difference_type n);
+
+        difference_type operator-(const self_type& rhs) const;
+
+        reference operator*() const;
+        pointer operator->() const;
+
+        bool operator==(const self_type& rhs) const;
+
+    private:
+
+        IT m_it_real;
+        IT m_it_imag;
+    };
+
+    /************************************
+     * xcomplex_sequence implementation *
+     ************************************/
+
+    template <class C, bool B>
+    inline xcomplex_sequence<C, B>::xcomplex_sequence(size_type s)
+        : m_real(make_sequence<container_type>(s)),
+          m_imag(make_sequence<container_type>(s))
+    {
+    }
+
+    template <class C, bool B>
+    inline xcomplex_sequence<C, B>::xcomplex_sequence(size_type s, const value_type& v)
+        : m_real(make_sequence<container_type>(s, v.real())),
+          m_imag(make_sequence<container_type>(s, v.imag()))
+    {
+    }
+
+    template <class C, bool B>
+    template <class TR, class TC, bool B2>
+    inline xcomplex_sequence<C, B>::xcomplex_sequence(size_type s, const xcomplex<TR, TC, B2>& v)
+        : m_real(make_sequence<container_type>(s, v.real())),
+          m_imag(make_sequence<container_type>(s, v.imag()))
+    {
+    }
+
+    template <class C, bool B>
+    inline xcomplex_sequence<C, B>::xcomplex_sequence(std::initializer_list<value_type> init)
+        : m_real(make_sequence<container_type>(init.size())),
+          m_imag(make_sequence<container_type>(init.size()))
+    {
+        std::transform(init.begin(), init.end(), m_real.begin(), [](const auto& v) { return v.real(); });
+        std::transform(init.begin(), init.end(), m_imag.begin(), [](const auto& v) { return v.imag(); });
+    }
+
+    template <class C, bool B>
+    inline bool xcomplex_sequence<C, B>::empty() const noexcept
+    {
+        return m_real.empty();
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::size() const noexcept -> size_type
+    {
+        return m_real.size();
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::max_size() const noexcept -> size_type
+    {
+        return m_real.max_size();
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::at(size_type i) -> reference
+    {
+        return reference(m_real.at(i), m_imag.at(i));
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::at(size_type i) const -> const_reference
+    {
+        return const_reference(m_real.at(i), m_imag.at(i));
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::operator[](size_type i) -> reference
+    {
+        return reference(m_real[i], m_imag[i]);
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::operator[](size_type i) const -> const_reference
+    {
+        return const_reference(m_real[i], m_imag[i]);
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::front() -> reference
+    {
+        return reference(m_real.front(), m_imag.front());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::front() const -> const_reference
+    {
+        return const_reference(m_real.front(), m_imag.front());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::back() -> reference
+    {
+        return reference(m_real.back(), m_imag.back());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::back() const -> const_reference
+    {
+        return const_reference(m_real.back(), m_imag.back());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::begin() noexcept -> iterator
+    {
+        return iterator(m_real.begin(), m_imag.begin());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::end() noexcept -> iterator
+    {
+        return iterator(m_real.end(), m_imag.end());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::begin() const noexcept -> const_iterator
+    {
+        return cbegin();
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::end() const noexcept -> const_iterator
+    {
+        return cend();
+    }
+    
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::cbegin() const noexcept -> const_iterator
+    {
+        return const_iterator(m_real.cbegin(), m_imag.cbegin());
+    }
+    
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::cend() const noexcept -> const_iterator
+    {
+        return const_iterator(m_real.cend(), m_imag.cend());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::rbegin() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(m_real.rbegin(), m_imag.rbegin());
+    }
+    
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::rend() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(m_real.rend(), m_imag.rend());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::rbegin() const noexcept -> const_reverse_iterator
+    {
+        return crbegin();
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::rend() const noexcept -> const_reverse_iterator
+    {
+        return crend();
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::crbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_real.crbegin(), m_imag.crbegin());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::crend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_real.crend(), m_imag.crend());
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::real() && noexcept -> container_type
+    {
+        return m_real;
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::real() & noexcept -> container_type&
+    {
+        return m_real;
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::real() const & noexcept -> const container_type&
+    {
+        return m_real;
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::imag() && noexcept -> container_type
+    {
+        return m_imag;
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::imag() & noexcept -> container_type&
+    {
+        return m_imag;
+    }
+
+    template <class C, bool B>
+    inline auto xcomplex_sequence<C, B>::imag() const & noexcept -> const container_type&
+    {
+        return m_imag;
+    }
+
+    template <class C, bool B>
+    inline bool operator==(const xcomplex_sequence<C, B>& lhs, const xcomplex_sequence<C, B>& rhs)
+    {
+        return lhs.real() == rhs.real() && lhs.imag() == rhs.imag();
+    }
+
+    template <class C, bool B>
+    inline bool operator!=(const xcomplex_sequence<C, B>& lhs, const xcomplex_sequence<C, B>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    /*********************************
+     * xcomplex_array implementation *
+     *********************************/
+
+    template <class T, std::size_t N, bool B>
+    inline xcomplex_array<T, N, B>::xcomplex_array(size_type s)
+        : base_type(s)
+    {
+    }
+    
+    template <class T, std::size_t N, bool B>
+    inline xcomplex_array<T, N, B>::xcomplex_array(size_type s, const value_type& v)
+        : base_type(s, v)
+    {
+    }
+
+    template <class T, std::size_t N, bool B>
+    template <class TR, class TI, bool B2>
+    inline xcomplex_array<T, N, B>::xcomplex_array(size_type s, const xcomplex<TR, TI, B2>& v)
+        : base_type(s, v)
+    {
+    }
+
+    /**********************************
+     * xcomplex_vector implementation *
+     **********************************/
+
+    template <class T, bool B, class A>
+    inline xcomplex_vector<T, B, A>::xcomplex_vector(size_type s)
+        : base_type(s)
+    {
+    }
+
+    template <class T, bool B, class A>
+    inline xcomplex_vector<T, B, A>::xcomplex_vector(size_type s, const value_type& v)
+        : base_type(s, v)
+    {
+    }
+
+    template <class T, bool B, class A>
+    template <class TR, class TI, bool B2>
+    inline xcomplex_vector<T, B, A>::xcomplex_vector(size_type s, const xcomplex<TR, TI, B2>& v)
+        : base_type(s, v)
+    {
+    }
+
+    template <class T, bool B, class A>
+    inline xcomplex_vector<T, B, A>::xcomplex_vector(std::initializer_list<value_type> init)
+        : base_type(init)
+    {
+    }
+
+    template <class T, bool B, class A>
+    void xcomplex_vector<T, B, A>::resize(size_type s)
+    {
+        this->m_real.resize(s);
+        this->m_imag.resize(s);
+    }
+
+    template <class T, bool B, class A>
+    void xcomplex_vector<T, B, A>::resize(size_type s, const value_type& v)
+    {
+        this->m_real.resize(s, v.real());
+        this->m_imag.resize(s, v.imag());
+    }
+
+    template <class T, bool B, class A>
+    template <class TR, class TI, bool B2>
+    inline void xcomplex_vector<T, B, A>::resize(size_type s, const xcomplex<TR, TI, B2>& v)
+    {
+        this->m_real.resize(s, v.real());
+        this->m_imag.resize(s, v.imag());
+    }
+
+    /************************************
+     * xcomplex_iterator implementation *
+     ************************************/
+
+    template <class IT, bool B>
+    inline xcomplex_iterator<IT, B>::xcomplex_iterator(IT it_real, IT it_imag)
+        : m_it_real(it_real), m_it_imag(it_imag)
+    {
+    }
+
+    template <class IT, bool B>
+    inline auto xcomplex_iterator<IT, B>::operator++() -> self_type&
+    {
+        ++m_it_real;
+        ++m_it_imag;
+        return *this;
+    }
+
+    template <class IT, bool B>
+    inline auto xcomplex_iterator<IT, B>::operator--() -> self_type&
+    {
+        --m_it_real;
+        --m_it_imag;
+        return *this;
+    }
+
+    template <class IT, bool B>
+    inline auto xcomplex_iterator<IT, B>::operator+=(difference_type n) -> self_type&
+    {
+        m_it_real += n;
+        m_it_imag += n;
+        return *this;
+    }
+
+    template <class IT, bool B>
+    inline auto xcomplex_iterator<IT, B>::operator-=(difference_type n) -> self_type&
+    {
+        m_it_real -= n;
+        m_it_imag -= n;
+        return *this;
+    }
+    
+    template <class IT, bool B>
+    inline auto xcomplex_iterator<IT, B>::operator-(const self_type& rhs) const -> difference_type
+    {
+        return m_it_real - rhs.m_it_real;
+    }
+
+    template <class IT, bool B>
+    inline auto xcomplex_iterator<IT, B>::operator*() const -> reference
+    {
+        return reference(*m_it_real, *m_it_imag);
+    }
+
+    template <class IT, bool B>
+    inline auto xcomplex_iterator<IT, B>::operator->() const -> pointer
+    {
+        return pointer(operator*());
+    }
+
+    template <class IT, bool B>
+    inline bool xcomplex_iterator<IT, B>::operator==(const self_type& rhs) const
+    {
+        return m_it_real == rhs.m_it_real && m_it_imag == rhs.m_it_imag;
+    }
+}
+
+#endif

+ 1356 - 0
3rd/numpy/include/xtl/xdynamic_bitset.hpp

@@ -0,0 +1,1356 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XDYNAMIC_BITSET_HPP
+#define XDYNAMIC_BITSET_HPP
+
+#include <climits>
+#include <type_traits>
+#include <vector>
+#include <initializer_list>
+#include <iterator>
+#include <memory>
+#include <algorithm>
+
+#include "xclosure.hpp"
+#include "xspan.hpp"
+#include "xiterator_base.hpp"
+#include "xtype_traits.hpp"
+
+namespace xtl
+{
+    template <class B, bool is_const>
+    class xbitset_reference;
+
+    template <class B, bool is_const>
+    class xbitset_iterator;
+
+    /******************
+     * xdyamic_bitset *
+     ******************/
+
+    template <class B>
+    class xdynamic_bitset_base;
+
+    template <class B, class A>
+    class xdynamic_bitset;
+
+    template <class X>
+    class xdynamic_bitset_view;
+
+    template <class X>
+    struct xdynamic_bitset_traits;
+
+    template <class B, class A>
+    struct xdynamic_bitset_traits<xdynamic_bitset<B, A>>
+    {
+        using storage_type = std::vector<B, A>;
+        using block_type = typename storage_type::value_type;
+    };
+
+    template <class X>
+    struct xdynamic_bitset_traits<xdynamic_bitset_view<X>>
+    {
+        using storage_type = xtl::span<X>;
+        using block_type = typename storage_type::value_type;
+    };
+
+    template <class X>
+    struct container_internals;
+
+    template <class X>
+    struct container_internals<xtl::span<X>>
+    {
+        using value_type = typename xtl::span<X>::value_type;
+        static_assert(xtl::is_scalar<value_type>::value, "");
+        using allocator_type = std::allocator<value_type>;
+        using size_type = std::size_t;
+        using difference_type = typename xtl::span<X>::difference_type;
+    };
+
+    template <class X, class A>
+    struct container_internals<std::vector<X, A>>
+    {
+        using value_type = X;
+        static_assert(xtl::is_scalar<value_type>::value, "");
+        using allocator_type = A;
+        using size_type = typename std::vector<X>::size_type;
+        using difference_type = typename std::vector<X>::difference_type;
+    };
+
+    template <class B>
+    class xdynamic_bitset_base
+    {
+    public:
+
+        using self_type = xdynamic_bitset_base<B>;
+        using derived_class = B;
+
+        using storage_type = typename xdynamic_bitset_traits<B>::storage_type;
+        using block_type = typename xdynamic_bitset_traits<B>::block_type;
+        using temporary_type = xdynamic_bitset<block_type, std::allocator<block_type>>;
+
+        using allocator_type = typename container_internals<storage_type>::allocator_type;
+        using value_type = bool;
+        using reference = xbitset_reference<derived_class, false>;
+        using const_reference = xbitset_reference<derived_class, true>;
+
+        using pointer = typename reference::pointer;
+        using const_pointer = typename const_reference::pointer;
+        using size_type = typename container_internals<storage_type>::size_type;
+        using difference_type = typename storage_type::difference_type;
+        using iterator = xbitset_iterator<derived_class, false>;
+        using const_iterator = xbitset_iterator<derived_class, true>;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+        using const_block_iterator = typename storage_type::const_iterator;
+
+        bool empty() const noexcept;
+        size_type size() const noexcept;
+
+        void swap(self_type& rhs);
+
+        reference at(size_type i);
+        const_reference at(size_type i) const;
+
+        reference operator[](size_type i);
+        const_reference operator[](size_type i) const;
+
+        reference front();
+        const_reference front() const;
+
+        reference back();
+        const_reference back() const;
+
+        iterator begin() noexcept;
+        iterator end() noexcept;
+
+        const_iterator begin() const noexcept;
+        const_iterator end() const noexcept;
+
+        const_iterator cbegin() const noexcept;
+        const_iterator cend() const noexcept;
+
+        reverse_iterator rbegin() noexcept;
+        reverse_iterator rend() noexcept;
+
+        const_reverse_iterator rbegin() const noexcept;
+        const_reverse_iterator rend() const noexcept;
+
+        const_reverse_iterator crbegin() const noexcept;
+        const_reverse_iterator crend() const noexcept;
+
+        const_block_iterator block_begin() const noexcept;
+        const_block_iterator block_end() const noexcept;
+
+        template <class R>
+        self_type& operator&=(const xdynamic_bitset_base<R>& rhs);
+        template <class R>
+        self_type& operator|=(const xdynamic_bitset_base<R>& rhs);
+        template <class R>
+        self_type& operator^=(const xdynamic_bitset_base<R>& rhs);
+
+        temporary_type operator<<(size_type pos);
+        self_type& operator<<=(size_type pos);
+        temporary_type operator>>(size_type pos);
+        self_type& operator>>=(size_type pos);
+
+        self_type& set();
+        self_type& set(size_type pos, value_type value = true);
+
+        self_type& reset();
+        self_type& reset(size_type pos);
+
+        self_type& flip();
+        self_type& flip(size_type pos);
+
+        bool all() const noexcept;
+        bool any() const noexcept;
+        bool none() const noexcept;
+        size_type count() const noexcept;
+
+        size_type block_count() const noexcept;
+        block_type* data() noexcept;
+        const block_type* data() const noexcept;
+
+        template <class Y>
+        bool operator==(const xdynamic_bitset_base<Y>& rhs) const noexcept;
+        template <class Y>
+        bool operator!=(const xdynamic_bitset_base<Y>& rhs) const noexcept;
+
+        derived_class& derived_cast();
+        const derived_class& derived_cast() const;
+
+    protected:
+
+        xdynamic_bitset_base(const storage_type& buffer, std::size_t size);
+
+        ~xdynamic_bitset_base() = default;
+        xdynamic_bitset_base(const xdynamic_bitset_base& rhs) = default;
+        xdynamic_bitset_base(xdynamic_bitset_base&& rhs) = default;
+        xdynamic_bitset_base& operator=(const xdynamic_bitset_base& rhs) = default;
+        xdynamic_bitset_base& operator=(xdynamic_bitset_base&& rhs) = default;
+
+        size_type m_size;
+        storage_type m_buffer;
+
+        static constexpr std::size_t s_bits_per_block = CHAR_BIT * sizeof(block_type);
+
+        size_type compute_block_count(size_type bits_count) const noexcept;
+        size_type block_index(size_type pos) const noexcept;
+        size_type bit_index(size_type pos) const noexcept;
+        block_type bit_mask(size_type pos) const noexcept;
+        size_type count_extra_bits() const noexcept;
+        void zero_unused_bits();
+    private:
+
+        // Make views and buffers friends
+        template<typename BB>
+        friend class xdynamic_bitset_base;
+    };
+
+    // NOTE this view ZEROS out remaining bits!
+    template <class X>
+    class xdynamic_bitset_view
+        : public xdynamic_bitset_base<xdynamic_bitset_view<X>>
+    {
+    public:
+
+        using base_class = xdynamic_bitset_base<xdynamic_bitset_view<X>>;
+        using storage_type = typename base_class::storage_type;
+        using block_type = typename base_class::block_type;
+
+        xdynamic_bitset_view(block_type* ptr, std::size_t size);
+
+        xdynamic_bitset_view() = default;
+        ~xdynamic_bitset_view() = default;
+        xdynamic_bitset_view(const xdynamic_bitset_view& rhs) = default;
+        xdynamic_bitset_view(xdynamic_bitset_view&& rhs) = default;
+        xdynamic_bitset_view& operator=(const xdynamic_bitset_view& rhs) = default;
+        xdynamic_bitset_view& operator=(xdynamic_bitset_view&& rhs) = default;
+
+        void resize(std::size_t sz);
+    };
+
+    namespace detail_bitset
+    {
+        template <class T>
+        constexpr T integer_ceil(T n, T div)
+        {
+            return (n + div - T(1)) / div;
+        }
+    }
+
+    template <class X>
+    inline xdynamic_bitset_view<X>::xdynamic_bitset_view(block_type* ptr, std::size_t size)
+        : base_class(storage_type(ptr, detail_bitset::integer_ceil(size, base_class::s_bits_per_block)), size)
+    {
+        base_class::zero_unused_bits();
+    }
+
+    template <class X>
+    inline void xdynamic_bitset_view<X>::resize(std::size_t sz)
+    {
+        if (sz != this->m_size) {
+#if defined(XTL_NO_EXCEPTIONS)
+            std::fprintf(stderr, "cannot resize bitset_view\n");
+            std::terminate();
+#else
+            throw std::runtime_error("cannot resize bitset_view");
+#endif
+        }
+    }
+
+    template <class B, class A = std::allocator<B>>
+    class xdynamic_bitset;
+
+    template <class B>
+    auto operator~(const xdynamic_bitset_base<B>& lhs);
+
+    template <class L, class R>
+    auto operator&(const xdynamic_bitset_base<L>& lhs, const xdynamic_bitset_base<R>& rhs);
+
+    template <class L, class R>
+    auto operator|(const xdynamic_bitset_base<L>& lhs, const xdynamic_bitset_base<R>& rhs);
+
+    template <class L, class R>
+    auto operator^(const xdynamic_bitset_base<L>& lhs, const xdynamic_bitset_base<R>& rhs);
+
+    template <class B>
+    void swap(const xdynamic_bitset_base<B>& lhs, const xdynamic_bitset_base<B>& rhs);
+
+    /*********************
+     * xbitset_reference *
+     *********************/
+
+    template <class B, bool is_const>
+    class xbitset_reference
+    {
+    public:
+
+        using self_type = xbitset_reference<B, is_const>;
+        using pointer = std::conditional_t<is_const,
+                                           const xclosure_pointer<const self_type>,
+                                           xclosure_pointer<self_type>>;
+
+        operator bool() const noexcept;
+
+        xbitset_reference(const self_type&) = default;
+        xbitset_reference(self_type&&) = default;
+
+        self_type& operator=(const self_type&) noexcept;
+        self_type& operator=(self_type&&) noexcept;
+        self_type& operator=(bool) noexcept;
+
+        bool operator~() const noexcept;
+
+        self_type& operator&=(bool) noexcept;
+        self_type& operator|=(bool) noexcept;
+        self_type& operator^=(bool) noexcept;
+        self_type& flip() noexcept;
+
+        pointer operator&() noexcept;
+
+    private:
+
+        using block_type = typename xdynamic_bitset_traits<B>::block_type;
+        using closure_type = std::conditional_t<is_const, const block_type&, block_type&>;
+
+        xbitset_reference(closure_type block, block_type pos);
+
+        void assign(bool) noexcept;
+        void set() noexcept;
+        void reset() noexcept;
+
+        closure_type m_block;
+        const block_type m_mask;
+
+        template <class BO, bool is_const_other>
+        friend class xbitset_reference;
+
+        friend class xdynamic_bitset_base<B>;
+    };
+
+    /********************
+     * xbitset_iterator *
+     ********************/
+
+    template <class B, bool is_const>
+    class xbitset_iterator : public xrandom_access_iterator_base<xbitset_iterator<B, is_const>,
+                                                                 typename xdynamic_bitset_base<B>::value_type,
+                                                                 typename xdynamic_bitset_base<B>::difference_type,
+                                                                 std::conditional_t<is_const,
+                                                                                    typename xdynamic_bitset_base<B>::const_pointer,
+                                                                                    typename xdynamic_bitset_base<B>::pointer>,
+                                                                 std::conditional_t<is_const,
+                                                                                    typename xdynamic_bitset_base<B>::const_reference,
+                                                                                    typename xdynamic_bitset_base<B>::reference>>
+    {
+    public:
+
+        using self_type = xbitset_iterator<B, is_const>;
+        using container_type = xdynamic_bitset_base<B>;
+        using value_type = typename container_type::value_type;
+        using reference = std::conditional_t<is_const,
+                                             typename container_type::const_reference,
+                                             typename container_type::reference>;
+        using pointer = std::conditional_t<is_const,
+                                           typename container_type::const_pointer,
+                                           typename container_type::pointer>;
+        using size_type = typename container_type::size_type;
+        using difference_type = typename container_type::difference_type;
+        using base_type = xrandom_access_iterator_base<self_type, value_type, difference_type, pointer, reference>;
+
+        using container_reference = std::conditional_t<is_const, const container_type&, container_type&>;
+        using container_pointer = std::conditional_t<is_const, const container_type*, container_type*>;
+
+        xbitset_iterator() noexcept;
+        xbitset_iterator(container_reference c, size_type index) noexcept;
+
+        self_type& operator++();
+        self_type& operator--();
+
+        self_type& operator+=(difference_type n);
+        self_type& operator-=(difference_type n);
+
+        difference_type operator-(const self_type& rhs) const;
+
+        reference operator*() const;
+        pointer operator->() const;
+
+        bool operator==(const self_type& rhs) const;
+        bool operator<(const self_type& rhs) const;
+
+    private:
+
+        container_pointer p_container;
+        size_type m_index;
+    };
+
+    template <class B, class Allocator>
+    class xdynamic_bitset
+        : public xdynamic_bitset_base<xdynamic_bitset<B, Allocator>>
+    {
+    public:
+
+        using allocator_type = Allocator;
+        using storage_type = std::vector<B, Allocator>;
+
+        using base_type = xdynamic_bitset_base<xdynamic_bitset<B, Allocator>>;
+        using self_type = xdynamic_bitset<B, Allocator>;
+        using block_type = B;
+
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+
+        using pointer = typename reference::pointer;
+        using const_pointer = typename const_reference::pointer;
+        using size_type = typename storage_type::size_type;
+        using difference_type = typename storage_type::difference_type;
+        using iterator = xbitset_iterator<self_type, false>;
+        using const_iterator = xbitset_iterator<self_type, true>;
+        using reverse_iterator = std::reverse_iterator<iterator>;
+        using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+        using base_type::base_type;
+        using base_type::begin;
+        using base_type::cbegin;
+        using base_type::end;
+        using base_type::cend;
+        using base_type::rbegin;
+        using base_type::rend;
+        using base_type::size;
+
+        xdynamic_bitset();
+
+        explicit xdynamic_bitset(const allocator_type& allocator);
+
+        xdynamic_bitset(size_type count, bool b, const allocator_type& alloc = allocator_type());
+        explicit xdynamic_bitset(size_type count, const allocator_type& alloc = allocator_type());
+        xdynamic_bitset(std::initializer_list<bool> init, const allocator_type& alloc = allocator_type());
+
+        template <class BlockInputIt>
+        xdynamic_bitset(BlockInputIt first, BlockInputIt last, const allocator_type& alloc = allocator_type());
+
+        xdynamic_bitset(const xdynamic_bitset& rhs);
+
+        // Allow creation from views for e.g. temporary creation
+        template <class Y>
+        xdynamic_bitset(const xdynamic_bitset_base<Y>& rhs);
+
+        ~xdynamic_bitset() = default;
+        xdynamic_bitset(xdynamic_bitset&& rhs) = default;
+        xdynamic_bitset& operator=(const xdynamic_bitset& rhs) = default;
+        xdynamic_bitset& operator=(xdynamic_bitset&& rhs) = default;
+
+        void assign(size_type count, bool b);
+        template <class BlockInputIt>
+        void assign(BlockInputIt first, BlockInputIt last);
+        void assign(std::initializer_list<bool> init);
+
+        size_type max_size() const noexcept;
+        void reserve(size_type new_cap);
+        size_type capacity() const noexcept;
+
+        allocator_type get_allocator() const;
+
+        void resize(size_type size, bool b = false);
+        void clear() noexcept;
+        void push_back(bool b);
+        void pop_back();
+    };
+
+    /**********************************
+     * xdynamic_bitset implementation *
+     **********************************/
+
+    template <class B, class A>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset()
+        : base_type(storage_type(), size_type(0))
+    {
+    }
+
+    template <class B, class A>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset(const allocator_type& allocator)
+        : base_type(storage_type(allocator), size_type(0))
+    {
+    }
+
+    template <class B, class A>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset(size_type count, bool b, const allocator_type& alloc)
+        : base_type(storage_type(this->compute_block_count(count), b ? ~block_type(0) : block_type(0), alloc), count)
+    {
+        this->zero_unused_bits();
+    }
+
+    template <class B, class A>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset(size_type count, const allocator_type& alloc)
+        : base_type(storage_type(this->compute_block_count(count), block_type(0), alloc), count)
+    {
+    }
+
+    template <class B, class A>
+    template <class BlockInputIt>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset(BlockInputIt first, BlockInputIt last, const allocator_type& alloc)
+        : base_type(storage_type(first, last, alloc), size_type(std::distance(first, last)) * base_type::s_bits_per_block)
+    {
+    }
+
+    template <class B, class A>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset(std::initializer_list<bool> init, const allocator_type& alloc)
+        : xdynamic_bitset(init.size(), alloc)
+    {
+        std::copy(init.begin(), init.end(), begin());
+    }
+
+    template <class B, class A>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset(const xdynamic_bitset& rhs)
+        : base_type(storage_type(rhs.block_begin(), rhs.block_end()), rhs.size())
+    {
+    }
+
+    template <class B, class A>
+    template <class Y>
+    inline xdynamic_bitset<B, A>::xdynamic_bitset(const xdynamic_bitset_base<Y>& rhs)
+        : base_type(storage_type(rhs.block_begin(), rhs.block_end()), rhs.size())
+    {
+    }
+
+    template <class B, class A>
+    inline void xdynamic_bitset<B, A>::assign(size_type count, bool b)
+    {
+        resize(count);
+        b ? this->set() : this->reset();
+    }
+
+    template <class B, class A>
+    template <class BlockInputIt>
+    inline void xdynamic_bitset<B, A>::assign(BlockInputIt first, BlockInputIt last)
+    {
+        resize(size_type(std::distance(first, last)) * base_type::s_bits_per_block);
+        std::copy(first, last, this->m_buffer.begin());
+    }
+
+    template <class B, class A>
+    inline void xdynamic_bitset<B, A>::assign(std::initializer_list<bool> init)
+    {
+        resize(init.size());
+        std::copy(init.begin(), init.end(), begin());
+    }
+
+    template <class B, class A>
+    inline auto xdynamic_bitset<B, A>::get_allocator() const -> allocator_type
+    {
+        return base_type::m_buffer.get_allocator();
+    }
+
+    template <class B>
+    inline bool xdynamic_bitset_base<B>::empty() const noexcept
+    {
+        return m_size == 0;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::size() const noexcept -> size_type
+    {
+        return m_size;
+    }
+
+    template <class B, class A>
+    inline auto xdynamic_bitset<B, A>::max_size() const noexcept -> size_type
+    {
+        return base_type::m_buffer.max_size() * base_type::s_bits_per_block;
+    }
+
+    template <class B, class A>
+    inline void xdynamic_bitset<B, A>::reserve(size_type new_cap)
+    {
+        base_type::m_buffer.reserve(this->compute_block_count(new_cap));
+    }
+
+    template <class B, class A>
+    inline auto xdynamic_bitset<B, A>::capacity() const noexcept -> size_type
+    {
+        return base_type::m_buffer.capacity() * base_type::s_bits_per_block;
+    }
+
+    template <class B, class A>
+    inline void xdynamic_bitset<B, A>::resize(size_type asize, bool b)
+    {
+        size_type old_block_count = base_type::block_count();
+        size_type new_block_count = base_type::compute_block_count(asize);
+        block_type value = b ? ~block_type(0) : block_type(0);
+
+        if (new_block_count != old_block_count)
+        {
+            base_type::m_buffer.resize(new_block_count, value);
+        }
+
+        if (b && asize > base_type::m_size)
+        {
+            size_type extra_bits = base_type::count_extra_bits();
+            if (extra_bits > 0)
+            {
+                base_type::m_buffer[old_block_count - 1] |= (value << extra_bits);
+            }
+        }
+
+        base_type::m_size = asize;
+        base_type::zero_unused_bits();
+    }
+
+    template <class B, class A>
+    inline void xdynamic_bitset<B, A>::clear() noexcept
+    {
+        base_type::m_buffer.clear();
+        base_type::m_size = size_type(0);
+    }
+
+    template <class B, class A>
+    inline void xdynamic_bitset<B, A>::push_back(bool b)
+    {
+        size_type s = size();
+        resize(s + 1);
+        this->set(s, b);
+    }
+
+    template <class B, class A>
+    inline void xdynamic_bitset<B, A>::pop_back()
+    {
+        size_type old_block_count = base_type::m_buffer.size();
+        size_type new_block_count = base_type::compute_block_count(base_type::m_size - 1);
+
+        if (new_block_count != old_block_count)
+        {
+            base_type::m_buffer.pop_back();
+        }
+
+        --base_type::m_size;
+        base_type::zero_unused_bits();
+    }
+
+    template <class B>
+    inline void xdynamic_bitset_base<B>::swap(self_type& rhs)
+    {
+        using std::swap;
+        swap(m_buffer, rhs.m_buffer);
+        swap(m_size, rhs.m_size);
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::at(size_type i) -> reference
+    {
+        // TODO add real check, remove m_buffer.at ...
+        return reference(m_buffer.at(block_index(i)), bit_index(i));
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::at(size_type i) const -> const_reference
+    {
+        // TODO add real check, remove m_buffer.at ...
+        return const_reference(m_buffer.at(block_index(i)), bit_index(i));
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::operator[](size_type i) -> reference
+    {
+        return reference(m_buffer[block_index(i)], bit_index(i));
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::operator[](size_type i) const -> const_reference
+    {
+        return const_reference(m_buffer[block_index(i)], bit_index(i));
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::front() -> reference
+    {
+        return (*this)[0];
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::front() const -> const_reference
+    {
+        return (*this)[0];
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::back() -> reference
+    {
+        return (*this)[m_size - 1];
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::back() const -> const_reference
+    {
+        return (*this)[m_size - 1];
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::begin() noexcept -> iterator
+    {
+        return iterator(*this, size_type(0));
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::end() noexcept -> iterator
+    {
+        return iterator(*this, size());
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::begin() const noexcept -> const_iterator
+    {
+        return cbegin();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::end() const noexcept -> const_iterator
+    {
+        return cend();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::cbegin() const noexcept -> const_iterator
+    {
+        return const_iterator(*this, size_type(0));
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::cend() const noexcept -> const_iterator
+    {
+        return const_iterator(*this, size());
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::rbegin() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(end());
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::rend() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(begin());
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::rbegin() const noexcept -> const_reverse_iterator
+    {
+        return crbegin();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::rend() const noexcept -> const_reverse_iterator
+    {
+        return crend();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::crbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(cend());
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::crend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(cbegin());
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::block_begin() const noexcept -> const_block_iterator
+    {
+        return m_buffer.begin();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::block_end() const noexcept -> const_block_iterator
+    {
+        return m_buffer.end();
+    }
+
+    template <class B>
+    template <class R>
+    inline auto xdynamic_bitset_base<B>::operator&=(const xdynamic_bitset_base<R>& rhs) -> self_type&
+    {
+        size_type size = block_count();
+        for (size_type i = 0; i < size; ++i)
+        {
+            m_buffer[i] &= rhs.m_buffer[i];
+        }
+        return *this;
+    }
+
+    template <class B>
+    template <class R>
+    inline auto xdynamic_bitset_base<B>::operator|=(const xdynamic_bitset_base<R>& rhs) -> self_type&
+    {
+        size_type size = block_count();
+        for (size_type i = 0; i < size; ++i)
+        {
+            m_buffer[i] |= rhs.m_buffer[i];
+        }
+        return *this;
+    }
+
+    template <class B>
+    template <class R>
+    inline auto xdynamic_bitset_base<B>::operator^=(const xdynamic_bitset_base<R>& rhs) -> self_type&
+    {
+        size_type size = block_count();
+        for (size_type i = 0; i < size; ++i)
+        {
+            m_buffer[i] ^= rhs.m_buffer[i];
+        }
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::operator<<(size_type pos) -> temporary_type
+    {
+        temporary_type tmp(this->derived_cast());
+        tmp <<= pos;
+        return tmp;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::operator<<=(size_type pos) -> self_type&
+    {
+        if (pos >= m_size)
+        {
+            return reset();
+        }
+
+        if (pos > 0)
+        {
+            size_type last = block_count() - 1;
+            size_type div = pos / s_bits_per_block;
+            size_type r = bit_index(pos);
+            block_type* b = &m_buffer[0];
+
+            if (r != 0)
+            {
+                size_type rs = s_bits_per_block - r;
+                for (size_type i = last - div; i > 0; --i)
+                {
+                    b[i + div] = (b[i] << r) | (b[i - 1] >> rs);
+                }
+                b[div] = b[0] << r;
+            }
+            else
+            {
+                for (size_type i = last - div; i > 0; --i)
+                {
+                    b[i + div] = b[i];
+                }
+                b[div] = b[0];
+            }
+
+            std::fill_n(m_buffer.begin(), div, block_type(0));
+            zero_unused_bits();
+        }
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::operator>>(size_type pos) -> temporary_type
+    {
+        temporary_type tmp(this->derived_cast());
+        tmp >>= pos;
+        return tmp;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::operator>>=(size_type pos) -> self_type&
+    {
+        if (pos >= m_size)
+        {
+            return reset();
+        }
+
+        if (pos > 0)
+        {
+            size_type last = block_count() - 1;
+            size_type div = pos / s_bits_per_block;
+            size_type r = bit_index(pos);
+            block_type* b = &m_buffer[0];
+
+            if (r != 0)
+            {
+                size_type ls = s_bits_per_block - r;
+                for (size_type i = div; i < last; ++i)
+                {
+                    b[i - div] = (b[i] >> r) | (b[i + 1] << ls);
+                }
+                b[last - div] = b[last] >> r;
+            }
+            else
+            {
+                for (size_type i = div; i <= last; ++i)
+                {
+                    b[i - div] = b[i];
+                }
+            }
+
+            std::fill_n(m_buffer.begin() + static_cast<std::ptrdiff_t>(block_count() - div), div, block_type(0));
+        }
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::set() -> self_type&
+    {
+        std::fill(m_buffer.begin(), m_buffer.end(), ~block_type(0));
+        zero_unused_bits();
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::set(size_type pos, value_type value) -> self_type&
+    {
+        if (value)
+        {
+            m_buffer[block_index(pos)] |= bit_mask(pos);
+        }
+        else
+        {
+            reset(pos);
+        }
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::reset() -> self_type&
+    {
+        std::fill(m_buffer.begin(), m_buffer.end(), block_type(0));
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::reset(size_type pos) -> self_type&
+    {
+        m_buffer[block_index(pos)] &= ~bit_mask(pos);
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::flip() -> self_type&
+    {
+        size_type size = block_count();
+        for (size_type i = 0; i < size; ++i)
+        {
+            m_buffer[i] = ~m_buffer[i];
+        }
+        zero_unused_bits();
+        return *this;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::flip(size_type pos) -> self_type&
+    {
+        m_buffer[block_index(pos)] ^= bit_mask(pos);
+        return *this;
+    }
+
+    template <class B>
+    inline bool xdynamic_bitset_base<B>::all() const noexcept
+    {
+        if (empty())
+            return true;
+
+        size_type extra_bits = count_extra_bits();
+        constexpr block_type all_ones = ~block_type(0);
+
+        size_type size = extra_bits != 0 ? block_count() - 1 : block_count();
+        for (size_type i = 0; i < size; ++i)
+        {
+            if (m_buffer[i] != all_ones)
+                return false;
+        }
+
+        if (extra_bits != 0)
+        {
+            block_type mask = ~(~block_type(0) << extra_bits);
+            if (m_buffer.back() != mask)
+                return false;
+        }
+
+        return true;
+    }
+
+    template <class B>
+    inline bool xdynamic_bitset_base<B>::any() const noexcept
+    {
+        size_type size = block_count();
+        for (size_type i = 0; i < size; ++i)
+        {
+            if (m_buffer[i])
+                return true;
+        }
+        return false;
+    }
+
+    template <class B>
+    inline bool xdynamic_bitset_base<B>::none() const noexcept
+    {
+        return !any();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::count() const noexcept -> size_type
+    {
+        static constexpr unsigned char table[] =
+        {
+            0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+            1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+            1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+            1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+            2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+            3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+        };
+        size_type res = 0;
+        const unsigned char* p = static_cast<const unsigned char*>(static_cast<const void*>(&m_buffer[0]));
+        size_type length = m_buffer.size() * sizeof(block_type);
+        for (size_type i = 0; i < length; ++i, ++p)
+        {
+            res += table[*p];
+        }
+        return res;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::block_count() const noexcept -> size_type
+    {
+        return m_buffer.size();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::data() noexcept -> block_type*
+    {
+        return m_buffer.data();
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::data() const noexcept -> const block_type*
+    {
+        return m_buffer.data();
+    }
+
+    template <class B>
+    template <class Y>
+    inline bool xdynamic_bitset_base<B>::operator==(const xdynamic_bitset_base<Y>& rhs) const noexcept
+    {
+        bool is_equal = m_size == rhs.m_size;
+        if (!is_equal) { return false; }
+
+        // we know that block type of lhs & rhs is the same
+        auto n_blocks = block_count();
+
+        for (std::size_t i = 0; i < n_blocks; ++i)
+        {
+            if (m_buffer[i] != rhs.m_buffer[i])
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    template <class B>
+    template <class Y>
+    inline bool xdynamic_bitset_base<B>::operator!=(const xdynamic_bitset_base<Y>& rhs) const noexcept
+    {
+        return !(*this == rhs);
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::derived_cast() -> derived_class&
+    {
+        return *(reinterpret_cast<derived_class*>(this));
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::derived_cast() const -> const derived_class&
+    {
+        return *(reinterpret_cast<const derived_class*>(this));
+    }
+
+    template <class B>
+    inline xdynamic_bitset_base<B>::xdynamic_bitset_base(const storage_type& buffer, std::size_t size)
+        : m_size(size), m_buffer(buffer)
+    {
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::compute_block_count(size_type bits_count) const noexcept -> size_type
+    {
+        return bits_count / s_bits_per_block
+            + static_cast<size_type>(bits_count % s_bits_per_block != 0);
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::block_index(size_type pos) const noexcept -> size_type
+    {
+        return pos / s_bits_per_block;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::bit_index(size_type pos) const noexcept -> size_type
+    {
+        return pos % s_bits_per_block;
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::bit_mask(size_type pos) const noexcept -> block_type
+    {
+        return block_type(1) << bit_index(pos);
+    }
+
+    template <class B>
+    inline auto xdynamic_bitset_base<B>::count_extra_bits() const noexcept -> size_type
+    {
+        return bit_index(size());
+    }
+
+    template <class B>
+    inline void xdynamic_bitset_base<B>::zero_unused_bits()
+    {
+        size_type extra_bits = count_extra_bits();
+        if (extra_bits != 0)
+        {
+            m_buffer.back() &= ~(~block_type(0) << extra_bits);
+        }
+    }
+
+    template <class B>
+    inline auto operator~(const xdynamic_bitset_base<B>& lhs)
+    {
+        using temporary_type = typename xdynamic_bitset_base<B>::temporary_type;
+        temporary_type res(lhs.derived_cast());
+        res.flip();
+        return res;
+    }
+
+    template <class L, class R>
+    inline auto operator&(const xdynamic_bitset_base<L>& lhs, const xdynamic_bitset_base<R>& rhs)
+    {
+        using temporary_type = typename xdynamic_bitset_base<L>::temporary_type;
+        temporary_type res(lhs.derived_cast());
+        res &= rhs;
+        return res;
+    }
+
+    template <class L, class R>
+    inline auto operator|(const xdynamic_bitset_base<L>& lhs, const xdynamic_bitset_base<R>& rhs)
+    {
+        using temporary_type = typename xdynamic_bitset_base<L>::temporary_type;
+        temporary_type res(lhs.derived_cast());
+        res |= rhs;
+        return res;
+    }
+
+    template <class L, class R>
+    inline auto operator^(const xdynamic_bitset_base<L>& lhs, const xdynamic_bitset_base<R>& rhs)
+    {
+        using temporary_type = typename xdynamic_bitset_base<L>::temporary_type;
+        temporary_type res(lhs.derived_cast());
+        res ^= rhs;
+        return res;
+    }
+
+    template <class B>
+    inline void swap(const xdynamic_bitset_base<B>& lhs, const xdynamic_bitset_base<B>& rhs)
+    {
+        return lhs.swap(rhs);
+    }
+
+    /************************************
+     * xbitset_reference implementation *
+     ************************************/
+
+    template <class B, bool C>
+    inline xbitset_reference<B, C>::xbitset_reference(closure_type block, block_type pos)
+        : m_block(block), m_mask(block_type(1) << pos)
+    {
+    }
+
+    template <class B, bool C>
+    inline xbitset_reference<B, C>::operator bool() const noexcept
+    {
+        return (m_block & m_mask) != 0;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::operator=(const self_type& rhs) noexcept -> self_type&
+    {
+        assign(rhs);
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::operator=(self_type&& rhs) noexcept -> self_type&
+    {
+        assign(rhs);
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::operator=(bool rhs) noexcept -> self_type&
+    {
+        assign(rhs);
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline bool xbitset_reference<B, C>::operator~() const noexcept
+    {
+        return (m_block & m_mask) == 0;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::operator&=(bool rhs) noexcept -> self_type&
+    {
+        if (!rhs)
+        {
+            reset();
+        }
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::operator|=(bool rhs) noexcept -> self_type&
+    {
+        if (rhs)
+        {
+            set();
+        }
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::operator^=(bool rhs) noexcept -> self_type&
+    {
+        return rhs ? flip() : *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::flip() noexcept -> self_type&
+    {
+        m_block ^= m_mask;
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_reference<B, C>::operator&() noexcept -> pointer
+    {
+        return pointer(*this);
+    }
+
+    template <class B, bool C>
+    inline void xbitset_reference<B, C>::assign(bool rhs) noexcept
+    {
+        rhs ? set() : reset();
+    }
+
+    template <class B, bool C>
+    inline void xbitset_reference<B, C>::set() noexcept
+    {
+        m_block |= m_mask;
+    }
+
+    template <class B, bool C>
+    inline void xbitset_reference<B, C>::reset() noexcept
+    {
+        m_block &= ~m_mask;
+    }
+
+    /***********************************
+     * xbitset_iterator implementation *
+     ***********************************/
+
+    template <class B, bool C>
+    inline xbitset_iterator<B, C>::xbitset_iterator() noexcept
+        : p_container(nullptr), m_index(0)
+    {
+    }
+
+    template <class B, bool C>
+    inline xbitset_iterator<B, C>::xbitset_iterator(container_reference c, size_type index) noexcept
+        : p_container(&c), m_index(index)
+    {
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_iterator<B, C>::operator++() -> self_type&
+    {
+        ++m_index;
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_iterator<B, C>::operator--() -> self_type&
+    {
+        --m_index;
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_iterator<B, C>::operator+=(difference_type n) -> self_type&
+    {
+        difference_type res = static_cast<difference_type>(m_index) + n;
+        m_index = static_cast<size_type>(res);
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_iterator<B, C>::operator-=(difference_type n) -> self_type&
+    {
+        difference_type res = static_cast<difference_type>(m_index) - n;
+        m_index = static_cast<size_type>(res);
+        return *this;
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_iterator<B, C>::operator-(const self_type& rhs) const -> difference_type
+    {
+        return difference_type(m_index - rhs.m_index);
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_iterator<B, C>::operator*() const -> reference
+    {
+        return (*p_container)[m_index];
+    }
+
+    template <class B, bool C>
+    inline auto xbitset_iterator<B, C>::operator->() const -> pointer
+    {
+        return &(operator*());
+    }
+
+    template <class B, bool C>
+    inline bool xbitset_iterator<B, C>::operator==(const self_type& rhs) const
+    {
+        return p_container == rhs.p_container && m_index == rhs.m_index;
+    }
+
+    template <class B, bool C>
+    inline bool xbitset_iterator<B, C>::operator<(const self_type& rhs) const
+    {
+        return p_container == rhs.p_container && m_index < rhs.m_index;
+    }
+}
+
+#endif

+ 44 - 0
3rd/numpy/include/xtl/xfunctional.hpp

@@ -0,0 +1,44 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_FUNCTIONAL_HPP
+#define XTL_FUNCTIONAL_HPP
+
+#include <utility>
+
+#include "xtl_config.hpp"
+#include "xtype_traits.hpp"
+
+namespace xtl
+{
+    /***************************
+     * identity implementation *
+     ***************************/
+
+    struct identity
+    {
+        template <class T>
+        T&& operator()(T&& x) const
+        {
+            return std::forward<T>(x);
+        }
+    };
+
+    /*************************
+     * select implementation *
+     *************************/
+
+    template <class B, class T1, class T2, XTL_REQUIRES(all_scalar<B, T1, T2>)>
+    inline std::common_type_t<T1, T2> select(const B& cond, const T1& v1, const T2& v2) noexcept
+    {
+        return cond ? v1 : v2;
+    }
+}
+
+#endif

+ 41 - 0
3rd/numpy/include/xtl/xhalf_float.hpp

@@ -0,0 +1,41 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XHALF_FLOAT_HPP
+#define XTL_XHALF_FLOAT_HPP
+
+#include "xtype_traits.hpp"
+#include "xhalf_float_impl.hpp"
+
+namespace xtl
+{
+    using half_float = half_float::half;
+
+    template <>
+    struct is_scalar<half_float> : std::true_type
+    {
+    };
+
+    template <>
+    struct is_arithmetic<half_float> : std::true_type
+    {
+    };
+
+    template <>
+    struct is_signed<half_float> : std::true_type
+    {
+    };
+
+    template <>
+    struct is_floating_point<half_float> : std::true_type
+    {
+    };
+}
+
+#endif

+ 4036 - 0
3rd/numpy/include/xtl/xhalf_float_impl.hpp

@@ -0,0 +1,4036 @@
+// half - IEEE 754-based half-precision floating-point library.
+//
+// Copyright (c) 2012-2019 Christian Rau <rauy@users.sourceforge.net>
+// Copyright (c) 2020 0xBYTESHIFT
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the 
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+/// \file
+/// Main header file for half-precision functionality.
+
+#pragma once
+
+#define HALF_TWOS_COMPLEMENT_INT 1
+
+// any error throwing C++ exceptions?
+#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT)
+#define HALF_ERRHANDLING_THROWS 1
+#endif
+
+// any error handling enabled?
+#define HALF_ERRHANDLING	(HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS)
+
+#if HALF_ERRHANDLING
+	#define HALF_UNUSED_NOERR(name) name
+#else
+	#define HALF_UNUSED_NOERR(name)
+#endif
+
+// support constexpr
+#if HALF_ERRHANDLING
+	#define constexpr_NOERR
+#else
+	#define constexpr_NOERR	constexpr
+#endif
+
+#include <utility>
+#include <algorithm>
+#include <istream>
+#include <ostream>
+#include <limits>
+#include <stdexcept>
+#include <climits>
+#include <cmath>
+#include <cstring>
+#include <cstdlib>
+#include <type_traits>
+#include <cstdint>
+#if HALF_ERRHANDLING_ERRNO
+	#include <cerrno>
+#endif
+#include <cfenv>
+#include <functional>
+
+#ifndef HALF_ENABLE_F16C_INTRINSICS
+	/// Enable F16C intruction set intrinsics.
+	/// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting between 
+	/// half-precision and single-precision values which may result in improved performance. This will not perform additional checks 
+	/// for support of the F16C instruction set, so an appropriate target platform is required when enabling this feature.
+	///
+	/// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on supporting platforms.
+	#define HALF_ENABLE_F16C_INTRINSICS __F16C__
+#endif
+
+#if HALF_ENABLE_F16C_INTRINSICS
+	#include <immintrin.h>
+#endif
+
+#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
+/// Raise INEXACT exception on overflow.
+/// Defining this to 1 (default) causes overflow errors to automatically raise inexact exceptions in addition.
+/// These will be raised after any possible handling of the underflow exception.
+#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT	1
+#endif
+
+#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
+/// Raise INEXACT exception on underflow.
+/// Defining this to 1 (default) causes underflow errors to automatically raise inexact exceptions in addition.
+/// These will be raised after any possible handling of the underflow exception.
+///
+/// **Note:** This will actually cause underflow (and the accompanying inexact) exceptions to be raised *only* when the result 
+/// is inexact, while if disabled bare underflow errors will be raised for *any* (possibly exact) subnormal result.
+#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT	1
+#endif
+
+/// Default rounding mode.
+/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and more precise types 
+/// (unless using half_cast() and specifying the rounding mode directly) as well as in arithmetic operations and mathematical 
+/// functions. It can be redefined (before including half.hpp) to one of the standard rounding modes using their respective 
+/// constants or the equivalent values of 
+/// [std::float_round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/float_round_style):
+///
+/// `std::float_round_style`         | value | rounding
+/// ---------------------------------|-------|-------------------------
+/// `std::round_indeterminate`       | -1    | fastest
+/// `std::round_toward_zero`         | 0     | toward zero
+/// `std::round_to_nearest`          | 1     | to nearest (default)
+/// `std::round_toward_infinity`     | 2     | toward positive infinity
+/// `std::round_toward_neg_infinity` | 3     | toward negative infinity
+///
+/// By default this is set to `1` (`std::round_to_nearest`), which rounds results to the nearest representable value. It can even 
+/// be set to [std::numeric_limits<float>::round_style](https://en.cppreference.com/w/cpp/types/numeric_limits/round_style) to synchronize 
+/// the rounding mode with that of the built-in single-precision implementation (which is likely `std::round_to_nearest`, though).
+#ifndef HALF_ROUND_STYLE
+	#define HALF_ROUND_STYLE	1		// = std::round_to_nearest
+#endif
+
+/// Value signaling overflow.
+/// In correspondence with `HUGE_VAL[F|L]` from `<cmath>` this symbol expands to a positive value signaling the overflow of an 
+/// operation, in particular it just evaluates to positive infinity.
+///
+/// **See also:** Documentation for [HUGE_VAL](https://en.cppreference.com/w/cpp/numeric/math/HUGE_VAL)
+#define HUGE_VALH	std::numeric_limits<half_float::half>::infinity()
+
+/// Fast half-precision fma function.
+/// This symbol is defined if the fma() function generally executes as fast as, or faster than, a separate 
+/// half-precision multiplication followed by an addition, which is always the case.
+///
+/// **See also:** Documentation for [FP_FAST_FMA](https://en.cppreference.com/w/cpp/numeric/math/fma)
+#define FP_FAST_FMAH	1
+
+///	Half rounding mode.
+/// In correspondence with `FLT_ROUNDS` from `<cfloat>` this symbol expands to the rounding mode used for 
+/// half-precision operations. It is an alias for [HALF_ROUND_STYLE](\ref HALF_ROUND_STYLE).
+///
+/// **See also:** Documentation for [FLT_ROUNDS](https://en.cppreference.com/w/cpp/types/climits/FLT_ROUNDS)
+#define HLF_ROUNDS	HALF_ROUND_STYLE
+
+#ifndef FP_ILOGB0
+	#define FP_ILOGB0		INT_MIN
+#endif
+#ifndef FP_ILOGBNAN
+	#define FP_ILOGBNAN		INT_MAX
+#endif
+#ifndef FP_SUBNORMAL
+	#define FP_SUBNORMAL	0
+#endif
+#ifndef FP_ZERO
+	#define FP_ZERO			1
+#endif
+#ifndef FP_NAN
+	#define FP_NAN			2
+#endif
+#ifndef FP_INFINITE
+	#define FP_INFINITE		3
+#endif
+#ifndef FP_NORMAL
+	#define FP_NORMAL		4
+#endif
+
+#if !defined(FE_ALL_EXCEPT)
+	#define FE_INVALID		0x10
+	#define FE_DIVBYZERO	0x08
+	#define FE_OVERFLOW		0x04
+	#define FE_UNDERFLOW	0x02
+	#define FE_INEXACT		0x01
+	#define FE_ALL_EXCEPT	(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT)
+#endif
+
+
+/// Main namespace for half-precision functionality.
+/// This namespace contains all the functionality provided by the library.
+namespace half_float {
+	class half;
+
+	/// Library-defined half-precision literals.
+	/// Import this namespace to enable half-precision floating-point literals:
+	/// ~~~~{.cpp}
+	/// using namespace half_float::literal;
+	/// half_float::half = 4.2_h;
+	/// ~~~~
+	namespace literal {
+		half operator "" _h(long double);
+	}
+
+	/// \internal
+	/// \brief Implementation details.
+	namespace detail {
+		/// Conditional type.
+		template<bool B, class T, class F> struct conditional : std::conditional<B,T,F> {};
+
+		/// Helper for tag dispatching.
+		template<bool B> struct bool_type : std::integral_constant<bool,B> {};
+		using std::true_type;
+		using std::false_type;
+
+		/// Type traits for floating-point types.
+		template<class T> struct is_float : std::is_floating_point<T> {};
+
+		/// Type traits for floating-point bits.
+		template<class T> struct bits { using type = unsigned char; };
+		template<class T> struct bits<const T> : bits<T> {};
+		template<class T> struct bits<volatile T> : bits<T> {};
+		template<class T> struct bits<const volatile T> : bits<T> {};
+
+		/// Unsigned integer of (at least) 16 bits width.
+		using uint16 = std::uint_least16_t;
+
+		/// Fastest unsigned integer of (at least) 32 bits width.
+		using uint32 = std::uint_fast32_t;
+
+		/// Fastest signed integer of (at least) 32 bits width.
+		using int32 = std::int_fast32_t;
+
+		/// Unsigned integer of (at least) 32 bits width.
+		template<> struct bits<float> { using type = std::uint_least32_t; };
+
+		/// Unsigned integer of (at least) 64 bits width.
+		template<> struct bits<double> { using type = std::uint_least64_t; };
+		template<class T> using bits_t = typename bits<T>::type;
+
+	#ifdef HALF_ARITHMETIC_TYPE
+		/// Type to use for arithmetic computations and mathematic functions internally.
+		typedef HALF_ARITHMETIC_TYPE internal_t;
+	#endif
+
+		/// Tag type for binary construction.
+		struct binary_t {};
+
+		/// Tag for binary construction.
+		constexpr binary_t binary = binary_t();
+
+		/// \name Implementation defined classification and arithmetic
+		/// \{
+
+		/// Check for infinity.
+		/// \tparam T argument type (builtin floating-point type)
+		/// \param arg value to query
+		/// \retval true if infinity
+		/// \retval false else
+		template<class T> bool builtin_isinf(T arg) { return std::isinf(arg); }
+
+		/// Check for NaN.
+		/// \tparam T argument type (builtin floating-point type)
+		/// \param arg value to query
+		/// \retval true if not a number
+		/// \retval false else
+		template<class T> bool builtin_isnan(T arg) { return std::isnan(arg); }
+
+		/// Check sign.
+		/// \tparam T argument type (builtin floating-point type)
+		/// \param arg value to query
+		/// \retval true if signbit set
+		/// \retval false else
+		template<class T> bool builtin_signbit(T arg) { return std::signbit(arg); }
+
+		/// Platform-independent sign mask.
+		/// \param arg integer value in two's complement
+		/// \retval -1 if \a arg negative
+		/// \retval 0 if \a arg positive
+		inline uint32 sign_mask(uint32 arg) {
+			static const int N = std::numeric_limits<uint32>::digits - 1;
+		#if HALF_TWOS_COMPLEMENT_INT
+			return static_cast<int32>(arg) >> N;
+		#else
+			return -((arg>>N)&1);
+		#endif
+		}
+
+		/// Platform-independent arithmetic right shift.
+		/// \param arg integer value in two's complement
+		/// \param i shift amount (at most 31)
+		/// \return \a arg right shifted for \a i bits with possible sign extension
+		inline uint32 arithmetic_shift(uint32 arg, int i) {
+		#if HALF_TWOS_COMPLEMENT_INT
+			return static_cast<int32>(arg) >> i;
+		#else
+			return static_cast<int32>(arg)/(static_cast<int32>(1)<<i) - ((arg>>(std::numeric_limits<uint32>::digits-1))&1);
+		#endif
+		}
+
+		/// \}
+		/// \name Error handling
+		/// \{
+
+		/// Internal exception flags.
+		/// \return reference to global exception flags
+		inline int& errflags() { thread_local int flags = 0; return flags; }
+
+		/// Raise floating-point exception.
+		/// \param flags exceptions to raise
+		/// \param cond condition to raise exceptions for
+		inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) {
+		#if HALF_ERRHANDLING
+			if(!cond)
+				return;
+		#if HALF_ERRHANDLING_FLAGS
+			errflags() |= flags;
+		#endif
+		#if HALF_ERRHANDLING_ERRNO
+			if(flags & FE_INVALID)
+				errno = EDOM;
+			else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW))
+				errno = ERANGE;
+		#endif
+		#if HALF_ERRHANDLING_FENV
+			std::feraiseexcept(flags);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_INVALID
+			if(flags & FE_INVALID)
+				throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_DIVBYZERO
+			if(flags & FE_DIVBYZERO)
+				throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_OVERFLOW
+			if(flags & FE_OVERFLOW)
+				throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_UNDERFLOW
+			if(flags & FE_UNDERFLOW)
+				throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW);
+		#endif
+		#ifdef HALF_ERRHANDLING_THROW_INEXACT
+			if(flags & FE_INEXACT)
+				throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT);
+		#endif
+		#if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
+			if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT))
+				raise(FE_INEXACT);
+		#endif
+		#if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
+			if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT))
+				raise(FE_INEXACT);
+		#endif
+		#endif
+		}
+
+		/// Check and signal for any NaN.
+		/// \param x first half-precision value to check
+		/// \param y second half-precision value to check
+		/// \retval true if either \a x or \a y is NaN
+		/// \retval false else
+		/// \exception FE_INVALID if \a x or \a y is NaN
+		inline constexpr_NOERR bool compsignal(unsigned int x, unsigned int y) {
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00);
+		#endif
+			return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00;
+		}
+
+		/// Signal and silence signaling NaN.
+		/// \param nan half-precision NaN value
+		/// \return quiet NaN
+		/// \exception FE_INVALID if \a nan is signaling NaN
+		inline constexpr_NOERR unsigned int signal(unsigned int nan) {
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, !(nan&0x200));
+		#endif
+			return nan | 0x200;
+		}
+
+		/// Signal and silence signaling NaNs.
+		/// \param x first half-precision value to check
+		/// \param y second half-precision value to check
+		/// \return quiet NaN
+		/// \exception FE_INVALID if \a x or \a y is signaling NaN
+		inline constexpr_NOERR unsigned int signal(unsigned int x, unsigned int y) {
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)));
+		#endif
+			return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200);
+		}
+
+		/// Signal and silence signaling NaNs.
+		/// \param x first half-precision value to check
+		/// \param y second half-precision value to check
+		/// \param z third half-precision value to check
+		/// \return quiet NaN
+		/// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN
+		inline constexpr_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) {
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200)));
+		#endif
+			return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200);
+		}
+
+		/// Select value or signaling NaN.
+		/// \param x preferred half-precision value
+		/// \param y ignored half-precision value except for signaling NaN
+		/// \return \a y if signaling NaN, \a x otherwise
+		/// \exception FE_INVALID if \a y is signaling NaN
+		inline constexpr_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) {
+		#if HALF_ERRHANDLING
+			return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x;
+		#else
+			return x;
+		#endif
+		}
+
+		/// Raise domain error and return NaN.
+		/// return quiet NaN
+		/// \exception FE_INVALID
+		inline constexpr_NOERR unsigned int invalid() {
+		#if HALF_ERRHANDLING
+			raise(FE_INVALID);
+		#endif
+			return 0x7FFF;
+		}
+
+		/// Raise pole error and return infinity.
+		/// \param sign half-precision value with sign bit only
+		/// \return half-precision infinity with sign of \a sign
+		/// \exception FE_DIVBYZERO
+		inline constexpr_NOERR unsigned int pole(unsigned int sign = 0) {
+		#if HALF_ERRHANDLING
+			raise(FE_DIVBYZERO);
+		#endif
+			return sign | 0x7C00;
+		}
+
+		/// Check value for underflow.
+		/// \param arg non-zero half-precision value to check
+		/// \return \a arg
+		/// \exception FE_UNDERFLOW if arg is subnormal
+		inline constexpr_NOERR unsigned int check_underflow(unsigned int arg) {
+		#if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
+			raise(FE_UNDERFLOW, !(arg&0x7C00));
+		#endif
+			return arg;
+		}
+
+		/// \}
+		/// \name Conversion and rounding
+		/// \{
+
+		/// Half-precision overflow.
+		/// \tparam R rounding mode to use
+		/// \param sign half-precision value with sign bit only
+		/// \return rounded overflowing half-precision value
+		/// \exception FE_OVERFLOW
+		template<std::float_round_style R> constexpr_NOERR unsigned int overflow(unsigned int sign = 0) {
+		#if HALF_ERRHANDLING
+			raise(FE_OVERFLOW);
+		#endif
+			return	(R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) :
+					(R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) :
+					(R==std::round_toward_zero) ? (sign|0x7BFF) :
+					(sign|0x7C00);
+		}
+
+		/// Half-precision underflow.
+		/// \tparam R rounding mode to use
+		/// \param sign half-precision value with sign bit only
+		/// \return rounded underflowing half-precision value
+		/// \exception FE_UNDERFLOW
+		template<std::float_round_style R> constexpr_NOERR unsigned int underflow(unsigned int sign = 0) {
+		#if HALF_ERRHANDLING
+			raise(FE_UNDERFLOW);
+		#endif
+			return	(R==std::round_toward_infinity) ? (sign+1-(sign>>15)) :
+					(R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) :
+					sign;
+		}
+
+		/// Round half-precision number.
+		/// \tparam R rounding mode to use
+		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
+		/// \param value finite half-precision number to round
+		/// \param g guard bit (most significant discarded bit)
+		/// \param s sticky bit (or of all but the most significant discarded bits)
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		template<std::float_round_style R,bool I> constexpr_NOERR unsigned int rounded(unsigned int value, int g, int s) {
+		#if HALF_ERRHANDLING
+			value +=	(R==std::round_to_nearest) ? (g&(s|value)) :
+						(R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) :
+						(R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0;
+			if((value&0x7C00) == 0x7C00)
+				raise(FE_OVERFLOW);
+			else if(value & 0x7C00)
+				raise(FE_INEXACT, I || (g|s)!=0);
+			else
+				raise(FE_UNDERFLOW, !(HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT) || I || (g|s)!=0);
+			return value;
+		#else
+			return	(R==std::round_to_nearest) ? (value+(g&(s|value))) :
+					(R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) :
+					(R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) :
+					value;
+		#endif
+		}
+
+		/// Round half-precision number to nearest integer value.
+		/// \tparam R rounding mode to use
+		/// \tparam E `true` for round to even, `false` for round away from zero
+		/// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it
+		/// \param value half-precision value to round
+		/// \return half-precision bits for nearest integral value
+		/// \exception FE_INVALID for signaling NaN
+		/// \exception FE_INEXACT if value had to be rounded and \a I is `true`
+		template<std::float_round_style R,bool E,bool I> unsigned int integral(unsigned int value) {
+			unsigned int abs = value & 0x7FFF;
+			if(abs < 0x3C00) {
+				raise(FE_INEXACT, I);
+				return ((R==std::round_to_nearest) ? (0x3C00&-static_cast<unsigned>(abs>=(0x3800+E))) :
+						(R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) :
+						(R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast<unsigned>(value>0x8000)) :
+						0) | (value&0x8000);
+			}
+			if(abs >= 0x6400)
+				return (abs>0x7C00) ? signal(value) : value;
+			unsigned int exp = 25 - (abs>>10), mask = (1<<exp) - 1;
+			raise(FE_INEXACT, I && (value&mask));
+			return ((	(R==std::round_to_nearest) ? ((1<<(exp-1))-(~(value>>exp)&E)) :
+						(R==std::round_toward_infinity) ? (mask&((value>>15)-1)) :
+						(R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) :
+						0) + value) & ~mask;
+		}
+
+		/// Convert fixed point to half-precision floating-point.
+		/// \tparam R rounding mode to use
+		/// \tparam F number of fractional bits (at least 11)
+		/// \tparam S `true` for signed, `false` for unsigned
+		/// \tparam N `true` for additional normalization step, `false` if already normalized to 1.F
+		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
+		/// \param m mantissa in Q1.F fixed point format
+		/// \param exp exponent
+		/// \param sign half-precision value with sign bit only
+		/// \param s sticky bit (or of all but the most significant already discarded bits)
+		/// \return value converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		template<std::float_round_style R,unsigned int F,bool S,bool N,bool I> unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) {
+			if(S) {
+				uint32 msign = sign_mask(m);
+				m = (m^msign) - msign;
+				sign = msign & 0x8000;
+			}
+			if(N)
+				for(; m<(static_cast<uint32>(1)<<F) && exp; m<<=1,--exp) ;
+			else if(exp < 0)
+				return rounded<R,I>(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast<uint32>(1)<<(F-11-exp))-1))!=0));
+			return rounded<R,I>(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast<uint32>(1)<<(F-11))-1))!=0));
+		}
+
+		/// Convert IEEE single-precision to half-precision.
+		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+		/// \tparam R rounding mode to use
+		/// \param value single-precision value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R> unsigned int float2half_impl(float value, true_type) {
+		#if HALF_ENABLE_F16C_INTRINSICS
+			return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value),
+				(R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT :
+				(R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO :
+				(R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF :
+				(R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF :
+				_MM_FROUND_CUR_DIRECTION));
+		#else
+			bits_t<float> fbits;
+			std::memcpy(&fbits, &value, sizeof(float));
+		#if 1
+			unsigned int sign = (fbits>>16) & 0x8000;
+			fbits &= 0x7FFFFFFF;
+			if(fbits >= 0x7F800000)
+				return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0);
+			if(fbits >= 0x47800000)
+				return overflow<R>(sign);
+			if(fbits >= 0x38800000)
+				return rounded<R,false>(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0);
+			if(fbits >= 0x33000000)
+			{
+				int i = 125 - (fbits>>23);
+				fbits = (fbits&0x7FFFFF) | 0x800000;
+				return rounded<R,false>(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast<uint32>(1)<<i)-1))!=0);
+			}
+			if(fbits != 0)
+				return underflow<R>(sign);
+			return sign;
+		#else
+			static const uint16 base_table[512] = {
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
+				0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 
+				0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 
+				0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 
+				0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 
+				0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 
+				0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 
+				0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 
+				0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 };
+			static const unsigned char shift_table[256] = {
+				24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
+				25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
+				25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
+				25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 
+				13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 
+				24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 };
+			int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp];
+			fbits &= 0x7FFFFF;
+			uint32 m = (fbits|((exp!=0)<<23)) & -static_cast<uint32>(exp!=0xFF);
+			return rounded<R,false>(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast<uint32>(1)<<(i-1))-1)&m)!=0);
+		#endif
+		#endif
+		}
+
+		/// Convert IEEE double-precision to half-precision.
+		/// \tparam R rounding mode to use
+		/// \param value double-precision value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R> unsigned int float2half_impl(double value, true_type) {
+		#if HALF_ENABLE_F16C_INTRINSICS
+			if(R == std::round_indeterminate)
+				return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION));
+		#endif
+			bits_t<double> dbits;
+			std::memcpy(&dbits, &value, sizeof(double));
+			uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF;
+			unsigned int sign = (hi>>16) & 0x8000;
+			hi &= 0x7FFFFFFF;
+			if(hi >= 0x7FF00000)
+				return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0);
+			if(hi >= 0x40F00000)
+				return overflow<R>(sign);
+			if(hi >= 0x3F100000)
+				return rounded<R,false>(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0);
+			if(hi >= 0x3E600000) {
+				int i = 1018 - (hi>>20);
+				hi = (hi&0xFFFFF) | 0x100000;
+				return rounded<R,false>(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast<uint32>(1)<<i)-1))|lo)!=0);
+			}
+			if((hi|lo) != 0)
+				return underflow<R>(sign);
+			return sign;
+		}
+
+		/// Convert non-IEEE floating-point to half-precision.
+		/// \tparam R rounding mode to use
+		/// \tparam T source type (builtin floating-point type)
+		/// \param value floating-point value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R,class T> unsigned int float2half_impl(T value, ...) {
+			unsigned int hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
+			if(value == T())
+				return hbits;
+			if(builtin_isnan(value))
+				return hbits | 0x7FFF;
+			if(builtin_isinf(value))
+				return hbits | 0x7C00;
+			int exp;
+			std::frexp(value, &exp);
+			if(exp > 16)
+				return overflow<R>(hbits);
+			if(exp < -13)
+				value = std::ldexp(value, 25);
+			else {
+				value = std::ldexp(value, 12-exp);
+				hbits |= ((exp+13)<<10);
+			}
+			T ival, frac = std::modf(value, &ival);
+			int m = std::abs(static_cast<int>(ival));
+			return rounded<R,false>(hbits+(m>>1), m&1, frac!=T());
+		}
+
+		/// Convert floating-point to half-precision.
+		/// \tparam R rounding mode to use
+		/// \tparam T source type (builtin floating-point type)
+		/// \param value floating-point value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R,class T> unsigned int float2half(T value) {
+			return float2half_impl<R>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
+		}
+		template<class T> unsigned int float2half(T value) {
+			return float2half_impl<(std::float_round_style)(HALF_ROUND_STYLE)>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
+		}
+
+		/// Convert integer to half-precision floating-point.
+		/// \tparam R rounding mode to use
+		/// \tparam T type to convert (builtin integer type)
+		/// \param value integral value to convert
+		/// \return rounded half-precision value
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R,class T> unsigned int int2half(T value) {
+			unsigned int bits = static_cast<unsigned>(value<0) << 15;
+			if(!value)
+				return bits;
+			if(bits)
+				value = -value;
+			if(value > 0xFFFF)
+				return overflow<R>(bits);
+			unsigned int m = static_cast<unsigned int>(value), exp = 24;
+			for(; m<0x400; m<<=1,--exp) ;
+			for(; m>0x7FF; m>>=1,++exp) ;
+			bits |= (exp<<10) + m;
+			return (exp>24) ? rounded<R,false>(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits;
+		}
+
+		/// Convert half-precision to IEEE single-precision.
+		/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+		/// \param value half-precision value to convert
+		/// \return single-precision value
+		inline float half2float_impl(unsigned int value, float, true_type) {
+		#if HALF_ENABLE_F16C_INTRINSICS
+			return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value)));
+		#else
+		#if 0
+			bits_t<float> fbits = static_cast<bits_t<float>>(value&0x8000) << 16;
+			int abs = value & 0x7FFF;
+			if(abs)
+			{
+				fbits |= 0x38000000 << static_cast<unsigned>(abs>=0x7C00);
+				for(; abs<0x400; abs<<=1,fbits-=0x800000) ;
+				fbits += static_cast<bits_t<float>>(abs) << 13;
+			}
+		#else
+			static const bits_t<float> mantissa_table[2048] = {
+				0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, 
+				0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 
+				0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 
+				0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 
+				0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, 
+				0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, 
+				0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 
+				0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 
+				0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, 
+				0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 
+				0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 
+				0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 
+				0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 
+				0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, 
+				0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, 
+				0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 
+				0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 
+				0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, 
+				0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, 
+				0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 
+				0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 
+				0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 
+				0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, 
+				0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, 
+				0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 
+				0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 
+				0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, 
+				0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 
+				0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 
+				0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 
+				0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 
+				0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, 
+				0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, 
+				0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 
+				0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 
+				0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, 
+				0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, 
+				0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 
+				0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 
+				0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 
+				0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, 
+				0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, 
+				0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 
+				0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 
+				0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, 
+				0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, 
+				0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 
+				0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 
+				0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 
+				0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, 
+				0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, 
+				0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 
+				0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 
+				0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, 
+				0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 
+				0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 
+				0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 
+				0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 
+				0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, 
+				0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, 
+				0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 
+				0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 
+				0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, 
+				0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 
+				0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 
+				0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 
+				0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 
+				0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, 
+				0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, 
+				0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 
+				0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 
+				0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, 
+				0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 
+				0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 
+				0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 
+				0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 
+				0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, 
+				0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, 
+				0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 
+				0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 
+				0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, 
+				0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 
+				0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 
+				0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 
+				0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 
+				0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, 
+				0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, 
+				0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 
+				0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 
+				0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, 
+				0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 
+				0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 
+				0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 
+				0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 
+				0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, 
+				0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, 
+				0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 
+				0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 
+				0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, 
+				0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 
+				0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 
+				0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 
+				0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 
+				0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, 
+				0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, 
+				0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 
+				0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 
+				0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, 
+				0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 
+				0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 
+				0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 
+				0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 
+				0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, 
+				0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, 
+				0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 
+				0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 
+				0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, 
+				0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 
+				0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 
+				0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 
+				0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 
+				0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, 
+				0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, 
+				0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 
+				0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 
+				0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, 
+				0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 
+				0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 };
+			static const bits_t<float> exponent_table[64] = {
+				0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, 
+				0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 
+				0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 
+				0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 };
+			static const unsigned short offset_table[64] = {
+				0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 
+				0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 };
+			bits_t<float> fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10];
+		#endif
+			float out;
+			std::memcpy(&out, &fbits, sizeof(float));
+			return out;
+		#endif
+		}
+
+		/// Convert half-precision to IEEE double-precision.
+		/// \param value half-precision value to convert
+		/// \return double-precision value
+		inline double half2float_impl(unsigned int value, double, true_type) {
+		#if HALF_ENABLE_F16C_INTRINSICS
+			return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value))));
+		#else
+			uint32 hi = static_cast<uint32>(value&0x8000) << 16;
+			unsigned int abs = value & 0x7FFF;
+			if(abs) {
+				hi |= 0x3F000000 << static_cast<unsigned>(abs>=0x7C00);
+				for(; abs<0x400; abs<<=1,hi-=0x100000) ;
+				hi += static_cast<uint32>(abs) << 10;
+			}
+			bits_t<double> dbits = static_cast<bits_t<double>>(hi) << 32;
+			double out;
+			std::memcpy(&out, &dbits, sizeof(double));
+			return out;
+		#endif
+		}
+
+		/// Convert half-precision to non-IEEE floating-point.
+		/// \tparam T type to convert to (builtin integer type)
+		/// \param value half-precision value to convert
+		/// \return floating-point value
+		template<class T> T half2float_impl(unsigned int value, T, ...) {
+			T out;
+			unsigned int abs = value & 0x7FFF;
+			if(abs > 0x7C00)
+				out = (std::numeric_limits<T>::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits<T>::signaling_NaN() :
+					std::numeric_limits<T>::has_quiet_NaN ? std::numeric_limits<T>::quiet_NaN() : T();
+			else if(abs == 0x7C00)
+				out = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : std::numeric_limits<T>::max();
+			else if(abs > 0x3FF)
+				out = std::ldexp(static_cast<T>((abs&0x3FF)|0x400), (abs>>10)-25);
+			else
+				out = std::ldexp(static_cast<T>(abs), -24);
+			return (value&0x8000) ? -out : out;
+		}
+
+		/// Convert half-precision to floating-point.
+		/// \tparam T type to convert to (builtin integer type)
+		/// \param value half-precision value to convert
+		/// \return floating-point value
+		template<class T> T half2float(unsigned int value) {
+			return half2float_impl(value, T(), bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
+		}
+
+		/// Convert half-precision floating-point to integer.
+		/// \tparam R rounding mode to use
+		/// \tparam E `true` for round to even, `false` for round away from zero
+		/// \tparam I `true` to raise INEXACT exception (if inexact), `false` to never raise it
+		/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
+		/// \param value half-precision value to convert
+		/// \return rounded integer value
+		/// \exception FE_INVALID if value is not representable in type \a T
+		/// \exception FE_INEXACT if value had to be rounded and \a I is `true`
+		template<std::float_round_style R,bool E,bool I,class T> T half2int(unsigned int value) {
+			unsigned int abs = value & 0x7FFF;
+			if(abs >= 0x7C00) {
+				raise(FE_INVALID);
+				return (value&0x8000) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
+			}
+			if(abs < 0x3800) {
+				raise(FE_INEXACT, I);
+				return	(R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) :
+						(R==std::round_toward_neg_infinity) ? -T(value>0x8000) :
+						T();
+			}
+			int exp = 25 - (abs>>10);
+			unsigned int m = (value&0x3FF) | 0x400;
+			int32 i = static_cast<int32>((exp<=0) ? (m<<-exp) : ((m+(
+				(R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) :
+				(R==std::round_toward_infinity) ? (((1<<exp)-1)&((value>>15)-1)) :
+				(R==std::round_toward_neg_infinity) ? (((1<<exp)-1)&-(value>>15)) : 0))>>exp));
+			if((!std::numeric_limits<T>::is_signed && (value&0x8000)) || (std::numeric_limits<T>::digits<16 &&
+				((value&0x8000) ? (-i<std::numeric_limits<T>::min()) : (i>std::numeric_limits<T>::max()))))
+				raise(FE_INVALID);
+			else if(I && exp > 0 && (m&((1<<exp)-1)))
+				raise(FE_INEXACT);
+			return static_cast<T>((value&0x8000) ? -i : i);
+		}
+
+		/// \}
+		/// \name Mathematics
+		/// \{
+
+		/// upper part of 64-bit multiplication.
+		/// \tparam R rounding mode to use
+		/// \param x first factor
+		/// \param y second factor
+		/// \return upper 32 bit of \a x * \a y
+		template<std::float_round_style R> uint32 mulhi(uint32 x, uint32 y) {
+			uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16);
+			return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) +
+				((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0);
+		}
+
+		/// 64-bit multiplication.
+		/// \param x first factor
+		/// \param y second factor
+		/// \return upper 32 bit of \a x * \a y rounded to nearest
+		inline uint32 multiply64(uint32 x, uint32 y) {
+			return static_cast<uint32>((static_cast<unsigned long long>(x)*static_cast<unsigned long long>(y)+0x80000000)>>32);
+		}
+
+		/// 64-bit division.
+		/// \param x upper 32 bit of dividend
+		/// \param y divisor
+		/// \param s variable to store sticky bit for rounding
+		/// \return (\a x << 32) / \a y
+		inline uint32 divide64(uint32 x, uint32 y, int &s) {
+			unsigned long long xx = static_cast<unsigned long long>(x) << 32;
+			return s = (xx%y!=0), static_cast<uint32>(xx/y);
+		}
+
+		/// Half precision positive modulus.
+		/// \tparam Q `true` to compute full quotient, `false` else
+		/// \tparam R `true` to compute signed remainder, `false` for positive remainder
+		/// \param x first operand as positive finite half-precision value
+		/// \param y second operand as positive finite half-precision value
+		/// \param quo adress to store quotient at, `nullptr` if \a Q `false`
+		/// \return modulus of \a x / \a y
+		template<bool Q,bool R> unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) {
+			unsigned int q = 0;
+			if(x > y) {
+				int absx = x, absy = y, expx = 0, expy = 0;
+				for(; absx<0x400; absx<<=1,--expx) ;
+				for(; absy<0x400; absy<<=1,--expy) ;
+				expx += absx >> 10;
+				expy += absy >> 10;
+				int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
+				for(int d=expx-expy; d; --d) {
+					if(!Q && mx == my)
+						return 0;
+					if(mx >= my) {
+						mx -= my;
+						q += Q;
+					}
+					mx <<= 1;
+					q <<= static_cast<int>(Q);
+				}
+				if(!Q && mx == my)
+					return 0;
+				if(mx >= my) {
+					mx -= my;
+					++q;
+				}
+				if(Q) {
+					q &= (1<<(std::numeric_limits<int>::digits-1)) - 1;
+					if(!mx)
+						return *quo = q, 0;
+				}
+				for(; mx<0x400; mx<<=1,--expy) ;
+				x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy));
+			}
+			if(R) {
+				unsigned int a, b;
+				if(y < 0x800) {
+					a = (x<0x400) ? (x<<1) : (x+0x400);
+					b = y;
+				} else {
+					a = x;
+					b = y - 0x400;
+				}
+				if(a > b || (a == b && (q&1))) {
+					int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF);
+					int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d));
+					for(; m<0x800 && exp>1; m<<=1,--exp) ;
+					x = 0x8000 + ((exp-1)<<10) + (m>>1);
+					q += Q;
+				}
+			}
+			if(Q)
+				*quo = q;
+			return x;
+		}
+
+		/// Fixed point square root.
+		/// \tparam F number of fractional bits
+		/// \param r radicand in Q1.F fixed point format
+		/// \param exp exponent
+		/// \return square root as Q1.F/2
+		template<unsigned int F> uint32 sqrt(uint32 &r, int &exp) {
+			int i = exp & 1;
+			r <<= i;
+			exp = (exp-i) / 2;
+			uint32 m = 0;
+			for(uint32 bit=static_cast<uint32>(1)<<F; bit; bit>>=2) {
+				if(r < m+bit)
+					m >>= 1;
+				else {
+					r -= m + bit;
+					m = (m>>1) + bit;
+				}
+			}
+			return m;
+		}
+
+		/// Fixed point binary exponential.
+		/// This uses the BKM algorithm in E-mode.
+		/// \param m exponent in [0,1) as Q0.31
+		/// \param n number of iterations (at most 32)
+		/// \return 2 ^ \a m as Q1.31
+		inline uint32 exp2(uint32 m, unsigned int n = 32) {
+			static const uint32 logs[] = {
+				0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
+				0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
+				0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
+				0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
+			if(!m)
+				return 0x80000000;
+			uint32 mx = 0x80000000, my = 0;
+			for(unsigned int i=1; i<n; ++i) {
+				uint32 mz = my + logs[i];
+				if(mz <= m) {
+					my = mz;
+					mx += mx >> i;
+				}
+			}
+			return mx;
+		}
+
+		/// Fixed point binary logarithm.
+		/// This uses the BKM algorithm in L-mode.
+		/// \param m mantissa in [1,2) as Q1.30
+		/// \param n number of iterations (at most 32)
+		/// \return log2(\a m) as Q0.31
+		inline uint32 log2(uint32 m, unsigned int n = 32) {
+			static const uint32 logs[] = {
+				0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
+				0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
+				0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
+				0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
+			if(m == 0x40000000)
+				return 0;
+			uint32 mx = 0x40000000, my = 0;
+			for(unsigned int i=1; i<n; ++i) {
+				uint32 mz = mx + (mx>>i);
+				if(mz <= m) {
+					mx = mz;
+					my += logs[i];
+				}
+			}
+			return my;
+		}
+
+		/// Fixed point sine and cosine.
+		/// This uses the CORDIC algorithm in rotation mode.
+		/// \param mz angle in [-pi/2,pi/2] as Q1.30
+		/// \param n number of iterations (at most 31)
+		/// \return sine and cosine of \a mz as Q1.30
+		inline std::pair<uint32,uint32> sincos(uint32 mz, unsigned int n = 31) {
+			static const uint32 angles[] = {
+				0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
+				0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
+				0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
+				0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
+			uint32 mx = 0x26DD3B6A, my = 0;
+			for(unsigned int i=0; i<n; ++i) {
+				uint32 sign = sign_mask(mz);
+				uint32 tx = mx - (arithmetic_shift(my, i)^sign) + sign;
+				uint32 ty = my + (arithmetic_shift(mx, i)^sign) - sign;
+				mx = tx; my = ty; mz -= (angles[i]^sign) - sign;
+			}
+			return std::make_pair(my, mx);
+		}
+
+		/// Fixed point arc tangent.
+		/// This uses the CORDIC algorithm in vectoring mode.
+		/// \param my y coordinate as Q0.30
+		/// \param mx x coordinate as Q0.30
+		/// \param n number of iterations (at most 31)
+		/// \return arc tangent of \a my / \a mx as Q1.30
+		inline uint32 atan2(uint32 my, uint32 mx, unsigned int n = 31) {
+			static const uint32 angles[] = {
+				0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
+				0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
+				0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
+				0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
+			uint32 mz = 0;
+			for(unsigned int i=0; i<n; ++i) {
+				uint32 sign = sign_mask(my);
+				uint32 tx = mx + (arithmetic_shift(my, i)^sign) - sign;
+				uint32 ty = my - (arithmetic_shift(mx, i)^sign) + sign;
+				mx = tx; my = ty; mz += (angles[i]^sign) - sign;
+			}
+			return mz;
+		}
+
+		/// Reduce argument for trigonometric functions.
+		/// \param abs half-precision floating-point value
+		/// \param k value to take quarter period
+		/// \return \a abs reduced to [-pi/4,pi/4] as Q0.30
+		inline uint32 angle_arg(unsigned int abs, int &k) {
+			uint32 m = (abs&0x3FF) | ((abs>0x3FF)<<10);
+			int exp = (abs>>10) + (abs<=0x3FF) - 15;
+			if(abs < 0x3A48)
+				return k = 0, m << (exp+20);
+			unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi;
+			uint32 sign = -static_cast<uint32>(f>>63);
+			k = static_cast<int>(yi>>(62-exp));
+			return (multiply64(static_cast<uint32>((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign;
+		}
+
+		/// Get arguments for atan2 function.
+		/// \param abs half-precision floating-point value
+		/// \return \a abs and sqrt(1 - \a abs^2) as Q0.30
+		inline std::pair<uint32,uint32> atan2_args(unsigned int abs) {
+			int exp = -15;
+			for(; abs<0x400; abs<<=1,--exp) ;
+			exp += abs >> 10;
+			uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my;
+			int rexp = 2 * exp;
+			r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast<uint32>(1)<<-rexp)-1))!=0)) : 1);
+			for(rexp=0; r<0x40000000; r<<=1,--rexp) ;
+			uint32 mx = sqrt<30>(r, rexp);
+			int d = exp - rexp;
+			if(d < 0)
+				return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx);
+			if(d > 0)
+				return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx)));
+			return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx);
+		}
+
+		/// Get exponentials for hyperbolic computation
+		/// \param abs half-precision floating-point value
+		/// \param exp variable to take unbiased exponent of larger result
+		/// \param n number of BKM iterations (at most 32)
+		/// \return exp(abs) and exp(-\a abs) as Q1.31 with same exponent
+		inline std::pair<uint32,uint32> hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) {
+			uint32 mx = detail::multiply64(static_cast<uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my;
+			int e = (abs>>10) + (abs<=0x3FF);
+			if(e < 14) {
+				exp = 0;
+				mx >>= 14 - e;
+			} else {
+				exp = mx >> (45-e);
+				mx = (mx<<(e-14)) & 0x7FFFFFFF;
+			}
+			mx = exp2(mx, n);
+			int d = exp << 1, s;
+			if(mx > 0x80000000) {
+				my = divide64(0x80000000, mx, s);
+				my |= s;
+				++d;
+			} else
+				my = mx;
+			return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast<uint32>(1)<<d)-1))!=0)) : 1);
+		}
+
+		/// Postprocessing for binary exponential.
+		/// \tparam R rounding mode to use
+		/// \tparam I `true` to always raise INEXACT exception, `false` to raise only for rounded results
+		/// \param m mantissa as Q1.31
+		/// \param exp absolute value of unbiased exponent
+		/// \param esign sign of actual exponent
+		/// \param sign sign bit of result
+		/// \return value converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded or \a I is `true`
+		template<std::float_round_style R,bool I> unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0) {
+			int s = 0;
+			if(esign) {
+				if(m > 0x80000000) {
+					m = divide64(0x80000000, m, s);
+					++exp;
+				}
+				if(exp > 25)
+					return underflow<R>(sign);
+				else if(exp == 25)
+					return rounded<R,I>(sign, 1, (m&0x7FFFFFFF)!=0);
+				exp = -exp;
+			} else if(exp > 15)
+				return overflow<R>(sign);
+			return fixed2half<R,31,false,false,I>(m, exp+14, sign, s);
+		}
+
+		/// Postprocessing for binary logarithm.
+		/// \tparam R rounding mode to use
+		/// \tparam L logarithm for base transformation as Q1.31
+		/// \param m fractional part of logarithm as Q0.31
+		/// \param ilog signed integer part of logarithm
+		/// \param exp biased exponent of result
+		/// \param sign sign bit of result
+		/// \return value base-transformed and converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R,uint32 L> unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) {
+			uint32 msign = sign_mask(ilog);
+			m = (((static_cast<uint32>(ilog)<<27)+(m>>4))^msign) - msign;
+			if(!m)
+				return 0;
+			for(; m<0x80000000; m<<=1,--exp) ;
+			int i = m >= L, s;
+			exp += i;
+			m >>= 1 + i;
+			sign ^= msign & 0x8000;
+			if(exp < -11)
+				return underflow<R>(sign);
+			m = divide64(m, L, s);
+			return fixed2half<R,30,false,false,true>(m, exp, sign, 1);
+		}
+
+		/// Hypotenuse square root and postprocessing.
+		/// \tparam R rounding mode to use
+		/// \param r mantissa as Q2.30
+		/// \param exp unbiased exponent
+		/// \return square root converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if value had to be rounded
+		template<std::float_round_style R> unsigned int hypot_post(uint32 r, int exp) {
+			int i = r >> 31;
+			if((exp+=i) > 46)
+				return overflow<R>();
+			if(exp < -34)
+				return underflow<R>();
+			r = (r>>i) | (r&i);
+			uint32 m = sqrt<30>(r, exp+=15);
+			return fixed2half<R,15,false,false,false>(m, exp-1, 0, r!=0);
+		}
+
+		/// Division and postprocessing for tangents.
+		/// \tparam R rounding mode to use
+		/// \param my dividend as Q1.31
+		/// \param mx divisor as Q1.31
+		/// \param exp biased exponent of result
+		/// \param sign sign bit of result
+		/// \return quotient converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R> unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) {
+			int i = my >= mx, s;
+			exp += i;
+			if(exp > 29)
+				return overflow<R>(sign);
+			if(exp < -11)
+				return underflow<R>(sign);
+			uint32 m = divide64(my>>(i+1), mx, s);
+			return fixed2half<R,30,false,false,true>(m, exp, sign, s);
+		}
+
+		/// Area function and postprocessing.
+		/// This computes the value directly in Q2.30 using the representation `asinh|acosh(x) = log(x+sqrt(x^2+|-1))`.
+		/// \tparam R rounding mode to use
+		/// \tparam S `true` for asinh, `false` for acosh
+		/// \param arg half-precision argument
+		/// \return asinh|acosh(\a arg) converted to half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R,bool S> unsigned int area(unsigned int arg) {
+			int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i;
+			uint32 mx = static_cast<uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r;
+			for(; abs<0x400; abs<<=1,--expy) ;
+			expy += abs >> 10;
+			r = ((abs&0x3FF)|0x400) << 5;
+			r *= r;
+			i = r >> 31;
+			expy = 2*expy + i;
+			r >>= i;
+			if(S) {
+				if(expy < 0) {
+					r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast<uint32>(1)<<-expy)-1))!=0)) : 1);
+					expy = 0;
+				} else {
+					r += 0x40000000 >> expy;
+					i = r >> 31;
+					r = (r>>i) | (r&i);
+					expy += i;
+				}
+			} else {
+				r -= 0x40000000 >> expy;
+				for(; r<0x40000000; r<<=1,--expy) ;
+			}
+			my = sqrt<30>(r, expy);
+			my = (my<<15) + (r<<14)/my;
+			if(S) {
+				mx >>= expy - expx;
+				ilog = expy;
+			} else {
+				my >>= expx - expy;
+				ilog = expx;
+			}
+			my += mx;
+			i = my >> 31;
+			static const int G = S && (R==std::round_to_nearest);
+			return log2_post<R,0xB8AA3B2A>(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast<unsigned>(S)<<15));
+		}
+
+		/// Class for 1.31 unsigned floating-point computation
+		struct f31 {
+			/// Constructor.
+			/// \param mant mantissa as 1.31
+			/// \param e exponent
+			constexpr f31(uint32 mant, int e) : m(mant), exp(e) {}
+
+			/// Constructor.
+			/// \param abs unsigned half-precision value
+			f31(unsigned int abs) : exp(-15) {
+				for(; abs<0x400; abs<<=1,--exp) ;
+				m = static_cast<uint32>((abs&0x3FF)|0x400) << 21;
+				exp += (abs>>10);
+			}
+
+			/// Addition operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a + \a b
+			friend f31 operator+(f31 a, f31 b) {
+				if(b.exp > a.exp)
+					std::swap(a, b);
+				int d = a.exp - b.exp;
+				uint32 m = a.m + ((d<32) ? (b.m>>d) : 0);
+				int i = (m&0xFFFFFFFF) < a.m;
+				return f31(((m+i)>>i)|0x80000000, a.exp+i);
+			}
+
+			/// Subtraction operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a - \a b
+			friend f31 operator-(f31 a, f31 b) {
+				int d = a.exp - b.exp, exp = a.exp;
+				uint32 m = a.m - ((d<32) ? (b.m>>d) : 0);
+				if(!m)
+					return f31(0, -32);
+				for(; m<0x80000000; m<<=1,--exp) ;
+				return f31(m, exp);
+			}
+
+			/// Multiplication operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a * \a b
+			friend f31 operator*(f31 a, f31 b) {
+				uint32 m = multiply64(a.m, b.m);
+				int i = m >> 31;
+				return f31(m<<(1-i), a.exp + b.exp + i);
+			}
+
+			/// Division operator.
+			/// \param a first operand
+			/// \param b second operand
+			/// \return \a a / \a b
+			friend f31 operator/(f31 a, f31 b) {
+				int i = a.m >= b.m, s;
+				uint32 m = divide64((a.m+i)>>i, b.m, s);
+				return f31(m, a.exp - b.exp + i - 1);
+			}
+
+			uint32 m;			///< mantissa as 1.31.
+			int exp;			///< exponent.
+		};
+
+		/// Error function and postprocessing.
+		/// This computes the value directly in Q1.31 using the approximations given 
+		/// [here](https://en.wikipedia.org/wiki/Error_function#Approximation_with_elementary_functions).
+		/// \tparam R rounding mode to use
+		/// \tparam C `true` for comlementary error function, `false` else
+		/// \param arg half-precision function argument
+		/// \return approximated value of error function in half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if no other exception occurred
+		template<std::float_round_style R,bool C> unsigned int erf(unsigned int arg) {
+			unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
+			f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t;
+			f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t /
+					((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<<x2.exp)&0x7FFFFFFF, 22), x2.m>>(31-x2.exp)));
+			return (!C || sign) ? fixed2half<R,31,false,true,true>(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) :
+					(e.exp<-25) ? underflow<R>() : fixed2half<R,30,false,false,true>(e.m>>1, e.exp+14, 0, e.m&1);
+		}
+
+		/// Gamma function and postprocessing.
+		/// This approximates the value of either the gamma function or its logarithm directly in Q1.31.
+		/// \tparam R rounding mode to use
+		/// \tparam L `true` for lograithm of gamma function, `false` for gamma function
+		/// \param arg half-precision floating-point value
+		/// \return lgamma/tgamma(\a arg) in half-precision
+		/// \exception FE_OVERFLOW on overflows
+		/// \exception FE_UNDERFLOW on underflows
+		/// \exception FE_INEXACT if \a arg is not a positive integer
+		template<std::float_round_style R,bool L> unsigned int gamma(unsigned int arg) {
+/*			static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
+			double t = arg + 4.65, s = p[0];
+			for(unsigned int i=0; i<5; ++i)
+				s += p[i+1] / (arg+i);
+			return std::log(s) + (arg-0.5)*std::log(t) - t;
+*/			static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
+			unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
+			bool bsign = sign != 0;
+			f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s =
+				f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1))
+				+ f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1));
+			int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16);
+			s = f31((static_cast<uint32>(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe;
+			if(x.exp != -1 || x.m != 0x80000000) {
+				i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8);
+				f31 l = f31((static_cast<uint32>(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe;
+				s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l);
+			}
+			s = x.exp ? (s-t) : (t-s);
+			if(bsign) {
+				if(z.exp >= 0) {
+					sign &= (L|((z.m>>(31-z.exp))&1)) - 1;
+					for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ;
+				}
+				if(z.exp == -1)
+					z = f31(0x80000000, 0) - z;
+				if(z.exp < -1) {
+					z = z * pi;
+					z.m = sincos(z.m>>(1-z.exp), 30).first;
+					for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ;
+				}
+				else
+					z = f31(0x80000000, 0);
+			} if(L) {
+				if(bsign) {
+					f31 l(0x92868247, 0);
+					if(z.exp < 0) {
+						uint32 m = log2((z.m+1)>>1, 27);
+						z = f31(-((static_cast<uint32>(z.exp)<<26)+(m>>5)), 5);
+						for(; z.m<0x80000000; z.m<<=1,--z.exp) ;
+						l = l + z / lbe;
+					}
+					sign = static_cast<unsigned>(x.exp&&(l.exp<s.exp||(l.exp==s.exp&&l.m<s.m))) << 15;
+					s = sign ? (s-l) : x.exp ? (l-s) : (l+s);
+				} else {
+					sign = static_cast<unsigned>(x.exp==0) << 15;
+					if(s.exp < -24)
+						return underflow<R>(sign);
+					if(s.exp > 15)
+						return overflow<R>(sign);
+				}
+			} else {
+				s = s * lbe;
+				uint32 m;
+				if(s.exp < 0) {
+					m = s.m >> -s.exp;
+					s.exp = 0;
+				} else {
+					m = (s.m<<s.exp) & 0x7FFFFFFF;
+					s.exp = (s.m>>(31-s.exp));
+				}
+				s.m = exp2(m, 27);
+				if(!x.exp)
+					s = f31(0x80000000, 0) / s;
+				if(bsign) {
+					if(z.exp < 0)
+						s = s * z;
+					s = pi / s;
+					if(s.exp < -24)
+						return underflow<R>(sign);
+				} else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1)))
+					return ((s.exp+14)<<10) + (s.m>>21);
+				if(s.exp > 15)
+					return overflow<R>(sign);
+			}
+			return fixed2half<R,31,false,false,true>(s.m, s.exp+14, sign);
+		}
+		/// \}
+
+		template<class,class,std::float_round_style> struct half_caster;
+	}
+
+	/// Half-precision floating-point type.
+	/// This class implements an IEEE-conformant half-precision floating-point type with the usual arithmetic 
+	/// operators and conversions. It is implicitly convertible to single-precision floating-point, which makes artihmetic 
+	/// expressions and functions with mixed-type operands to be of the most precise operand type.
+	///
+	/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and 
+	/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which 
+	/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the 
+	/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of 
+	/// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most 
+	/// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit 
+	/// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if 
+	/// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on 
+	/// nearly any reasonable platform.
+	///
+	/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable 
+	/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation.
+	class half {
+	public:
+		/// \name Construction and assignment
+		/// \{
+
+		/// Default constructor.
+		/// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics 
+		/// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics.
+		constexpr half() noexcept : data_() {}
+
+		/// Conversion constructor.
+		/// \param rhs float to convert
+		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		//explicit half(float rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(rhs))) {}
+
+		/// Conversion constructor.
+		/// \param rhs float to convert
+		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		template<class T>
+		half(T rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(static_cast<float>(rhs)))) {}
+
+		/// Conversion to single-precision.
+		/// \return single precision value representing expression value
+		operator float() const { return detail::half2float<float>(data_); }
+
+		/// Assignment operator.
+		/// \param rhs single-precision value to copy from
+		/// \return reference to this half
+		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		half& operator=(const float &rhs) { data_ = static_cast<detail::uint16>(detail::float2half<round_style>(rhs)); return *this; }
+
+		template<class T>
+		half& operator=(const T &rhs) { return *this = static_cast<float>(rhs); }
+
+		/// \}
+		/// \name Arithmetic updates
+		/// \{
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to add
+		/// \return reference to this half
+		/// \exception FE_... according to operator+(half,half)
+		half& operator+=(half rhs) { return *this = *this + rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to subtract
+		/// \return reference to this half
+		/// \exception FE_... according to operator-(half,half)
+		half& operator-=(half rhs) { return *this = *this - rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to multiply with
+		/// \return reference to this half
+		/// \exception FE_... according to operator*(half,half)
+		half& operator*=(half rhs) { return *this = *this * rhs; }
+
+		/// Arithmetic assignment.
+		/// \tparam T type of concrete half expression
+		/// \param rhs half expression to divide by
+		/// \return reference to this half
+		/// \exception FE_... according to operator/(half,half)
+		half& operator/=(half rhs) { return *this = *this / rhs; }
+
+		/*
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to add
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator+=(float rhs) { return *this = *this + rhs; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to subtract
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator-=(float rhs) { return *this = *this - rhs; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to multiply with
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator*=(float rhs) { return *this = *this * rhs; }
+
+		/// Arithmetic assignment.
+		/// \param rhs single-precision value to divide by
+		/// \return reference to this half
+		/// \exception FE_... according to operator=()
+		half& operator/=(float rhs) { return *this = *this / rhs; }
+		*/
+
+		/// \}
+		/// \name Increment and decrement
+		/// \{
+
+		/// Prefix increment.
+		/// \return incremented half value
+		/// \exception FE_... according to operator+(half,half)
+		half& operator++() { return *this = *this + half(detail::binary, 0x3C00); }
+
+		/// Prefix decrement.
+		/// \return decremented half value
+		/// \exception FE_... according to operator-(half,half)
+		half& operator--() { return *this = *this + half(detail::binary, 0xBC00); }
+
+		/// Postfix increment.
+		/// \return non-incremented half value
+		/// \exception FE_... according to operator+(half,half)
+		half operator++(int) { half out(*this); ++*this; return out; }
+
+		/// Postfix decrement.
+		/// \return non-decremented half value
+		/// \exception FE_... according to operator-(half,half)
+		half operator--(int) { half out(*this); --*this; return out; }
+		/// \}
+		detail::uint16 get_data()const{ return data_; }
+	
+	private:
+		/// Rounding mode to use
+		static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
+
+		/// Constructor.
+		/// \param bits binary representation to set half to
+		constexpr half(detail::binary_t, unsigned int bits) noexcept : data_(static_cast<detail::uint16>(bits)) {}
+
+		/// Internal binary representation
+		detail::uint16 data_;
+
+		friend constexpr_NOERR bool operator==(half, half);
+		template<class T> friend constexpr_NOERR bool operator==(half, T);
+		template<class T> friend constexpr_NOERR bool operator==(T, half);
+		friend constexpr_NOERR bool operator!=(half, half);
+		template<class T> friend constexpr_NOERR bool operator!=(half, T);
+		template<class T> friend constexpr_NOERR bool operator!=(T, half);
+		friend constexpr_NOERR bool operator<(half, half);
+		template<class T> friend constexpr_NOERR bool operator<(half, T);
+		template<class T> friend constexpr_NOERR bool operator<(T, half);
+		friend constexpr_NOERR bool operator>(half, half);
+		template<class T> friend constexpr_NOERR bool operator>(half, T);
+		template<class T> friend constexpr_NOERR bool operator>(T, half);
+		friend constexpr_NOERR bool operator<=(half, half);
+		template<class T> friend constexpr_NOERR bool operator<=(half, T);
+		template<class T> friend constexpr_NOERR bool operator<=(T, half);
+		friend constexpr_NOERR bool operator>=(half, half);
+		template<class T> friend constexpr_NOERR bool operator>=(half, T);
+		template<class T> friend constexpr_NOERR bool operator>=(T, half);
+		friend constexpr half operator+(half);
+		friend constexpr half operator-(half);
+		friend half operator+(half, half);
+		template<class T> friend half operator+(half, T);
+		template<class T> friend half operator+(T, half);
+		friend half operator-(half, half);
+		template<class T> friend half operator-(half, T);
+		template<class T> friend half operator-(T, half);
+		friend half operator*(half, half);
+		template<class T> friend half operator*(half, T);
+		template<class T> friend half operator*(T, half);
+		friend half operator/(half, half);
+		template<class T> friend half operator/(half, T);
+		template<class T> friend half operator/(T, half);
+		template<class charT,class traits> friend std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits>&, half);
+		template<class charT,class traits> friend std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits>&, half&);
+		friend constexpr half fabs(half);
+		friend half fmod(half, half);
+		friend half remainder(half, half);
+		friend half remquo(half, half, int*);
+		friend half fma(half, half, half);
+		friend constexpr_NOERR half fmax(half, half);
+		friend constexpr_NOERR half fmin(half, half);
+		friend half fdim(half, half);
+		friend half nanh(const char*);
+		friend half exp(half);
+		friend half exp2(half);
+		friend half expm1(half);
+		friend half log(half);
+		friend half log10(half);
+		friend half log2(half);
+		friend half log1p(half);
+		friend half sqrt(half);
+		friend half cbrt(half);
+		friend half hypot(half, half);
+		friend half hypot(half, half, half);
+		friend half pow(half, half);
+		friend void sincos(half, half*, half*);
+		friend half sin(half);
+		friend half cos(half);
+		friend half tan(half);
+		friend half asin(half);
+		friend half acos(half);
+		friend half atan(half);
+		friend half atan2(half, half);
+		friend half sinh(half);
+		friend half cosh(half);
+		friend half tanh(half);
+		friend half asinh(half);
+		friend half acosh(half);
+		friend half atanh(half);
+		friend half erf(half);
+		friend half erfc(half);
+		friend half lgamma(half);
+		friend half tgamma(half);
+		friend half ceil(half);
+		friend half floor(half);
+		friend half trunc(half);
+		friend half round(half);
+		friend long lround(half);
+		friend half rint(half);
+		friend long lrint(half);
+		friend half nearbyint(half);
+		friend long long llround(half);
+		friend long long llrint(half);
+		friend half frexp(half, int*);
+		friend half scalbln(half, long);
+		friend half modf(half, half*);
+		friend int ilogb(half);
+		friend half logb(half);
+		friend half nextafter(half, half);
+		friend half nexttoward(half, long double);
+		friend constexpr half copysign(half, half);
+		friend constexpr int fpclassify(half);
+		friend constexpr bool isfinite(half);
+		friend constexpr bool isinf(half);
+		friend constexpr bool isnan(half);
+		friend constexpr bool isnormal(half);
+		friend constexpr bool signbit(half);
+		friend constexpr bool isgreater(half, half);
+		friend constexpr bool isgreaterequal(half, half);
+		friend constexpr bool isless(half, half);
+		friend constexpr bool islessequal(half, half);
+		friend constexpr bool islessgreater(half, half);
+		template<class,class,std::float_round_style> friend struct detail::half_caster;
+		friend class std::numeric_limits<half>;
+		friend struct std::hash<half>;
+		friend half literal::operator "" _h(long double);
+	};
+
+	namespace literal {
+		/// Half literal.
+		/// While this returns a properly rounded half-precision value, half literals can unfortunately not be constant 
+		/// expressions due to rather involved conversions. So don't expect this to be a literal literal without involving 
+		/// conversion operations at runtime. It is a convenience feature, not a performance optimization.
+		/// \param value literal value
+		/// \return half with of given value (possibly rounded)
+		/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+		inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half<half::round_style>(value)); }
+	}
+
+	namespace detail {
+		/// Helper class for half casts.
+		/// This class template has to be specialized for all valid cast arguments to define an appropriate static 
+		/// `cast` member function and a corresponding `type` member denoting its return type.
+		/// \tparam T destination type
+		/// \tparam U source type
+		/// \tparam R rounding mode to use
+		template<class T,class U,std::float_round_style R=(std::float_round_style)(HALF_ROUND_STYLE)> struct half_caster {};
+		template<class U,std::float_round_style R> struct half_caster<half,U,R> {
+			static_assert(std::is_arithmetic<U>::value, "half_cast from non-arithmetic type unsupported");
+			static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
+		private:
+			static half cast_impl(U arg, true_type) { return half(binary, float2half<R>(arg)); }
+			static half cast_impl(U arg, false_type) { return half(binary, int2half<R>(arg)); }
+		};
+		template<class T,std::float_round_style R> struct half_caster<T,half,R> {
+			static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
+			static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
+		private:
+			static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
+			static T cast_impl(half arg, false_type) { return half2int<R,true,true,T>(arg.data_); }
+		};
+		template<std::float_round_style R> struct half_caster<half,half,R> {
+			static half cast(half arg) { return arg; }
+		};
+	}
+}
+
+/// Extensions to the C++ standard library.
+namespace std {
+	/// Numeric limits for half-precision floats.
+	/// **See also:** Documentation for [std::numeric_limits](https://en.cppreference.com/w/cpp/types/numeric_limits)
+	template<> class numeric_limits<half_float::half> {
+	public:
+		/// Is template specialization.
+		static constexpr bool is_specialized = true;
+
+		/// Supports signed values.
+		static constexpr bool is_signed = true;
+
+		/// Is not an integer type.
+		static constexpr bool is_integer = false;
+
+		/// Is not exact.
+		static constexpr bool is_exact = false;
+
+		/// Doesn't provide modulo arithmetic.
+		static constexpr bool is_modulo = false;
+
+		/// Has a finite set of values.
+		static constexpr bool is_bounded = true;
+
+		/// IEEE conformant.
+		static constexpr bool is_iec559 = true;
+
+		/// Supports infinity.
+		static constexpr bool has_infinity = true;
+
+		/// Supports quiet NaNs.
+		static constexpr bool has_quiet_NaN = true;
+
+		/// Supports signaling NaNs.
+		static constexpr bool has_signaling_NaN = true;
+
+		/// Supports subnormal values.
+		static constexpr float_denorm_style has_denorm = denorm_present;
+
+		/// Supports no denormalization detection.
+		static constexpr bool has_denorm_loss = false;
+
+	#if HALF_ERRHANDLING_THROWS
+		static constexpr bool traps = true;
+	#else
+		/// Traps only if [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID) is acitvated.
+		static constexpr bool traps = false;
+	#endif
+
+		/// Does not support no pre-rounding underflow detection.
+		static constexpr bool tinyness_before = false;
+
+		/// Rounding mode.
+		static constexpr float_round_style round_style = half_float::half::round_style;
+
+		/// Significant digits.
+		static constexpr int digits = 11;
+
+		/// Significant decimal digits.
+		static constexpr int digits10 = 3;
+
+		/// Required decimal digits to represent all possible values.
+		static constexpr int max_digits10 = 5;
+
+		/// Number base.
+		static constexpr int radix = 2;
+
+		/// One more than smallest exponent.
+		static constexpr int min_exponent = -13;
+
+		/// Smallest normalized representable power of 10.
+		static constexpr int min_exponent10 = -4;
+
+		/// One more than largest exponent
+		static constexpr int max_exponent = 16;
+
+		/// Largest finitely representable power of 10.
+		static constexpr int max_exponent10 = 4;
+
+		/// Smallest positive normal value.
+		static constexpr half_float::half min() noexcept { return half_float::half(half_float::detail::binary, 0x0400); }
+
+		/// Smallest finite value.
+		static constexpr half_float::half lowest() noexcept { return half_float::half(half_float::detail::binary, 0xFBFF); }
+
+		/// Largest finite value.
+		static constexpr half_float::half max() noexcept { return half_float::half(half_float::detail::binary, 0x7BFF); }
+
+		/// Difference between 1 and next representable value.
+		static constexpr half_float::half epsilon() noexcept { return half_float::half(half_float::detail::binary, 0x1400); }
+
+		/// Maximum rounding error in ULP (units in the last place).
+		static constexpr half_float::half round_error() noexcept
+			{ return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); }
+
+		/// Positive infinity.
+		static constexpr half_float::half infinity() noexcept { return half_float::half(half_float::detail::binary, 0x7C00); }
+
+		/// Quiet NaN.
+		static constexpr half_float::half quiet_NaN() noexcept { return half_float::half(half_float::detail::binary, 0x7FFF); }
+
+		/// Signaling NaN.
+		static constexpr half_float::half signaling_NaN() noexcept { return half_float::half(half_float::detail::binary, 0x7DFF); }
+
+		/// Smallest positive subnormal value.
+		static constexpr half_float::half denorm_min() noexcept { return half_float::half(half_float::detail::binary, 0x0001); }
+	};
+
+	/// Hash function for half-precision floats.
+	/// **See also:** Documentation for [std::hash](https://en.cppreference.com/w/cpp/utility/hash)
+	template<> struct hash<half_float::half> {
+		/// Type of function argument.
+		typedef half_float::half argument_type;
+
+		/// Function return type.
+		typedef size_t result_type;
+
+		/// Compute hash function.
+		/// \param arg half to hash
+		/// \return hash value
+		result_type operator()(argument_type arg) const { return hash<half_float::detail::uint16>()(arg.data_&-static_cast<unsigned>(arg.data_!=0x8000)); }
+	};
+}
+
+namespace half_float {
+	/// \anchor compop
+	/// \name Comparison operators
+	/// \{
+
+	/// Comparison for equality.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if operands equal
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline constexpr_NOERR bool operator==(half x, half y) {
+		return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF));
+	}
+	template<class T>
+	inline constexpr_NOERR bool operator==(half x, T y) { return x == static_cast<half>(y); }
+	template<class T>
+	inline constexpr_NOERR bool operator==(T x, half y) { return static_cast<half>(x) == y; }
+
+	/// Comparison for inequality.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if operands not equal
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline constexpr_NOERR bool operator!=(half x, half y) {
+		return detail::compsignal(x.data_, y.data_) || (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF));
+	}
+	template<class T>
+	inline constexpr_NOERR bool operator!=(half x, T y) { return x != static_cast<half>(y); }
+	template<class T>
+	inline constexpr_NOERR bool operator!=(T x, half y) { return static_cast<half>(x) != y; }
+
+	/// Comparison for less than.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less than \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline constexpr_NOERR bool operator<(half x, half y) {
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+	template<class T>
+	inline constexpr_NOERR bool operator<(half x, T y) { return x < static_cast<half>(y); }
+	template<class T>
+	inline constexpr_NOERR bool operator<(T x, half y) { return static_cast<half>(x) < y; }
+
+	/// Comparison for greater than.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater than \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline constexpr_NOERR bool operator>(half x, half y) {
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+	template<class T>
+	inline constexpr_NOERR bool operator>(half x, T y) { return x > static_cast<half>(y); }
+	template<class T>
+	inline constexpr_NOERR bool operator>(T x, half y) { return static_cast<half>(x) > y; }
+
+	/// Comparison for less equal.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less equal \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline constexpr_NOERR bool operator<=(half x, half y) {
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+	template<class T>
+	inline constexpr_NOERR bool operator<=(half x, T y) { return x <= static_cast<half>(y); }
+	template<class T>
+	inline constexpr_NOERR bool operator<=(T x, half y) { return static_cast<half>(x) <= y; }
+
+	/// Comparison for greater equal.
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater equal \a y
+	/// \retval false else
+	/// \exception FE_INVALID if \a x or \a y is NaN
+	inline constexpr_NOERR bool operator>=(half x, half y) {
+		return !detail::compsignal(x.data_, y.data_) &&
+			((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15));
+	}
+	template<class T>
+	inline constexpr_NOERR bool operator>=(half x, T y) { return x >= static_cast<half>(y); }
+	template<class T>
+	inline constexpr_NOERR bool operator>=(T x, half y) { return static_cast<half>(x) >= y; }
+
+	/// \}
+	/// \anchor arithmetics
+	/// \name Arithmetic operators
+	/// \{
+
+	/// Identity.
+	/// \param arg operand
+	/// \return unchanged operand
+	inline constexpr half operator+(half arg) { return arg; }
+
+	/// Negation.
+	/// \param arg operand
+	/// \return negated operand
+	inline constexpr half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); }
+
+	/// Addition.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return sum of half expressions
+	/// \exception FE_INVALID if \a x and \a y are infinities with different signs or signaling NaNs
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator+(half x, half y) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)+detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF;
+		bool sub = ((x.data_^y.data_)&0x8000) != 0;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ :
+										(sub && absx==0x7C00) ? detail::invalid() : y.data_);
+		if(!absx)
+			return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_));
+		if(!absy)
+			return x;
+		unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000;
+		if(absy > absx)
+			std::swap(absx, absy);
+		int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my;
+		if(d < 13) {
+			my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3;
+			my = (my>>d) | ((my&((1<<d)-1))!=0);
+		} else
+			my = 1;
+		if(sub) {
+			if(!(mx-=my))
+				return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
+			for(; mx<0x2000 && exp>1; mx<<=1,--exp) ;
+		} else {
+			mx += my;
+			int i = mx >> 14;
+			if((exp+=i) > 30)
+				return half(detail::binary, detail::overflow<half::round_style>(sign));
+			mx = (mx>>i) | (mx&i);
+		}
+		return half(detail::binary, detail::rounded<half::round_style,false>(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0));
+	#endif
+	}
+	template<class T>
+	inline half operator+(half x, T y) { return x + static_cast<half>(y); }
+	template<class T>
+	inline half operator+(T x, half y) { return static_cast<half>(x) + y; }
+
+	/// Subtraction.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return difference of half expressions
+	/// \exception FE_INVALID if \a x and \a y are infinities with equal signs or signaling NaNs
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator-(half x, half y) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)-detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		return x + (-y);
+	#endif
+	}
+	template<class T>
+	inline half operator-(half x, T y) { return x - static_cast<half>(y); }
+	template<class T>
+	inline half operator-(T x, half y) { return static_cast<half>(x) - y; }
+
+	/// Multiplication.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return product of half expressions
+	/// \exception FE_INVALID if multiplying 0 with infinity or if \a x or \a y is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator*(half x, half y) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)*detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16;
+		unsigned int sign = (x.data_^y.data_) & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00));
+		if(!absx || !absy)
+			return half(detail::binary, sign);
+		for(; absx<0x400; absx<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,--exp) ;
+		detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
+		int i = m >> 21, s = m & i;
+		exp += (absx>>10) + (absy>>10) + i;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -11)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		return half(detail::binary, detail::fixed2half<half::round_style,20,false,false,false>(m>>i, exp, sign, s));
+	#endif
+	}
+	template<class T>
+	inline half operator*(half x, T y) { return x * static_cast<half>(y); }
+	template<class T>
+	inline half operator*(T x, half y) { return static_cast<half>(x) * y; }
+
+	/// Division.
+	/// This operation is exact to rounding for all rounding modes.
+	/// \param x left operand
+	/// \param y right operand
+	/// \return quotient of half expressions
+	/// \exception FE_INVALID if dividing 0s or infinities with each other or if \a x or \a y is signaling NaN
+	/// \exception FE_DIVBYZERO if dividing finite value by 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half operator/(half x, half y) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(detail::half2float<detail::internal_t>(x.data_)/detail::half2float<detail::internal_t>(y.data_)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14;
+		unsigned int sign = (x.data_^y.data_) & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0)));
+		if(!absx)
+			return half(detail::binary, absy ? sign : detail::invalid());
+		if(!absy)
+			return half(detail::binary, detail::pole(sign));
+		for(; absx<0x400; absx<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,++exp) ;
+		detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
+		int i = mx < my;
+		exp += (absx>>10) - (absy>>10) - i;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -11)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		mx <<= 12 + i;
+		my <<= 1;
+		return half(detail::binary, detail::fixed2half<half::round_style,11,false,false,false>(mx/my, exp, sign, mx%my!=0));
+	#endif
+	}
+	template<class T>
+	inline half operator/(half x, T y) { return x / static_cast<half>(y); }
+	template<class T>
+	inline half operator/(T x, half y) { return static_cast<half>(x) / y; }
+
+	/// \}
+	/// \anchor streaming
+	/// \name Input and output
+	/// \{
+
+	/// Output operator.
+	///	This uses the built-in functionality for streaming out floating-point numbers.
+	/// \param out output stream to write into
+	/// \param arg half expression to write
+	/// \return reference to output stream
+	template<class charT,class traits> std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits> &out, half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return out << detail::half2float<detail::internal_t>(arg.data_);
+	#else
+		return out << detail::half2float<float>(arg.data_);
+	#endif
+	}
+
+	/// Input operator.
+	///	This uses the built-in functionality for streaming in floating-point numbers, specifically double precision floating 
+	/// point numbers (unless overridden with [HALF_ARITHMETIC_TYPE](\ref HALF_ARITHMETIC_TYPE)). So the input string is first 
+	/// rounded to double precision using the underlying platform's current floating-point rounding mode before being rounded 
+	/// to half-precision using the library's half-precision rounding mode.
+	/// \param in input stream to read from
+	/// \param arg half to read into
+	/// \return reference to input stream
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	template<class charT,class traits> std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits> &in, half &arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t f;
+	#else
+		double f;
+	#endif
+		if(in >> f)
+			arg.data_ = detail::float2half<half::round_style>(f);
+		return in;
+	}
+
+	/// \}
+	/// \anchor basic
+	/// \name Basic mathematical operations
+	/// \{
+
+	/// Absolute value.
+	/// **See also:** Documentation for [std::fabs](https://en.cppreference.com/w/cpp/numeric/math/fabs).
+	/// \param arg operand
+	/// \return absolute value of \a arg
+	inline constexpr half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); }
+
+	/// Absolute value.
+	/// **See also:** Documentation for [std::abs](https://en.cppreference.com/w/cpp/numeric/math/fabs).
+	/// \param arg operand
+	/// \return absolute value of \a arg
+	inline constexpr half abs(half arg) { return fabs(arg); }
+
+	/// Remainder of division.
+	/// **See also:** Documentation for [std::fmod](https://en.cppreference.com/w/cpp/numeric/math/fmod).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return remainder of floating-point division.
+	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	inline half fmod(half x, half y) {
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==0x7C00) ? detail::invalid() : x.data_);
+		if(!absy)
+			return half(detail::binary, detail::invalid());
+		if(!absx)
+			return x;
+		if(absx == absy)
+			return half(detail::binary, sign);
+		return half(detail::binary, sign|detail::mod<false,false>(absx, absy));
+	}
+
+	/// Remainder of division.
+	/// **See also:** Documentation for [std::remainder](https://en.cppreference.com/w/cpp/numeric/math/remainder).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return remainder of floating-point division.
+	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	inline half remainder(half x, half y) {
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==0x7C00) ? detail::invalid() : x.data_);
+		if(!absy)
+			return half(detail::binary, detail::invalid());
+		if(absx == absy)
+			return half(detail::binary, sign);
+		return half(detail::binary, sign^detail::mod<false,true>(absx, absy));
+	}
+
+	/// Remainder of division.
+	/// **See also:** Documentation for [std::remquo](https://en.cppreference.com/w/cpp/numeric/math/remquo).
+	/// \param x first operand
+	/// \param y second operand
+	/// \param quo address to store some bits of quotient at
+	/// \return remainder of floating-point division.
+	/// \exception FE_INVALID if \a x is infinite or \a y is 0 or if \a x or \a y is signaling NaN
+	inline half remquo(half x, half y, int *quo) {
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_));
+		if(!absy)
+			return half(detail::binary, detail::invalid());
+		bool qsign = ((value^y.data_)&0x8000) != 0;
+		int q = 1;
+		if(absx != absy)
+			value ^= detail::mod<true, true>(absx, absy, &q);
+		return *quo = qsign ? -q : q, half(detail::binary, value);
+	}
+
+	/// Fused multiply add.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::fma](https://en.cppreference.com/w/cpp/numeric/math/fma).
+	/// \param x first operand
+	/// \param y second operand
+	/// \param z third operand
+	/// \return ( \a x * \a y ) + \a z rounded as one operation.
+	/// \exception FE_INVALID according to operator*() and operator+() unless any argument is a quiet NaN and no argument is a signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding the final addition
+	inline half fma(half x, half y, half z) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_), fz = detail::half2float<detail::internal_t>(z.data_);
+		#if FP_FAST_FMA
+			return half(detail::binary, detail::float2half<half::round_style>(std::fma(fx, fy, fz)));
+		#else
+			return half(detail::binary, detail::float2half<half::round_style>(fx*fy+fz));
+		#endif
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15;
+		unsigned int sign = (x.data_^y.data_) & 0x8000;
+		bool sub = ((sign^z.data_)&0x8000) != 0;
+		if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
+			return	(absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) :
+					(absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) :
+					(absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z;
+		if(!absx || !absy)
+			return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign));
+		for(; absx<0x400; absx<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,--exp) ;
+		detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
+		int i = m >> 21;
+		exp += (absx>>10) + (absy>>10) + i;
+		m <<= 3 - i;
+		if(absz) {
+			int expz = 0;
+			for(; absz<0x400; absz<<=1,--expz) ;
+			expz += absz >> 10;
+			detail::uint32 mz = static_cast<detail::uint32>((absz&0x3FF)|0x400) << 13;
+			if(expz > exp || (expz == exp && mz > m)) {
+				std::swap(m, mz);
+				std::swap(exp, expz);
+				if(sub)
+					sign = z.data_ & 0x8000;
+			}
+			int d = exp - expz;
+			mz = (d<23) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+			if(sub) {
+				m = m - mz;
+				if(!m)
+					return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
+				for(; m<0x800000; m<<=1,--exp) ;
+			} else {
+				m += mz;
+				i = m >> 24;
+				m = (m>>i) | (m&i);
+				exp += i;
+			}
+		}
+		if(exp > 30)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -10)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		return half(detail::binary, detail::fixed2half<half::round_style,23,false,false,false>(m, exp-1, sign));
+	#endif
+	}
+
+	/// Maximum of half expressions.
+	/// **See also:** Documentation for [std::fmax](https://en.cppreference.com/w/cpp/numeric/math/fmax).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return maximum of operands, ignoring quiet NaNs
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	inline constexpr_NOERR half fmax(half x, half y) {
+		return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) < 
+			(y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
+	}
+
+	/// Minimum of half expressions.
+	/// **See also:** Documentation for [std::fmin](https://en.cppreference.com/w/cpp/numeric/math/fmin).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return minimum of operands, ignoring quiet NaNs
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	inline constexpr_NOERR half fmin(half x, half y) {
+		return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) >
+			(y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
+	}
+
+	/// Positive difference.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::fdim](https://en.cppreference.com/w/cpp/numeric/math/fdim).
+	/// \param x first operand
+	/// \param y second operand
+	/// \return \a x - \a y or 0 if difference negative
+	/// \exception FE_... according to operator-(half,half)
+	inline half fdim(half x, half y) {
+		if(isnan(x) || isnan(y))
+			return half(detail::binary, detail::signal(x.data_, y.data_));
+		return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y);
+	}
+
+	/// Get NaN value.
+	/// **See also:** Documentation for [std::nan](https://en.cppreference.com/w/cpp/numeric/math/nan).
+	/// \param arg string code
+	/// \return quiet NaN
+	inline half nanh(const char *arg) {
+		unsigned int value = 0x7FFF;
+		while(*arg)
+			value ^= static_cast<unsigned>(*arg++) & 0xFF;
+		return half(detail::binary, value);
+	}
+
+	/// \}
+	/// \anchor exponential
+	/// \name Exponential functions
+	/// \{
+
+	/// Exponential function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::exp](https://en.cppreference.com/w/cpp/numeric/math/exp).
+	/// \param arg function argument
+	/// \return e raised to \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half exp(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::exp(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
+		if(abs >= 0x4C80)
+			return half(detail::binary, (arg.data_&0x8000) ? detail::underflow<half::round_style>() : detail::overflow<half::round_style>());
+		detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
+		int e = (abs>>10) + (abs<=0x3FF), exp;
+		if(e < 14) {
+			exp = 0;
+			m >>= 14 - e;
+		} else {
+			exp = m >> (45-e);
+			m = (m<<(e-14)) & 0x7FFFFFFF;
+		}
+		return half(detail::binary, detail::exp2_post<half::round_style,true>(detail::exp2(m, 26), exp, (arg.data_&0x8000)!=0));
+	#endif
+	}
+
+	/// Binary exponential.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::exp2](https://en.cppreference.com/w/cpp/numeric/math/exp2).
+	/// \param arg function argument
+	/// \return 2 raised to \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half exp2(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::exp2(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
+		if(abs >= 0x4E40)
+			return half(detail::binary, (arg.data_&0x8000) ? detail::underflow<half::round_style>() : detail::overflow<half::round_style>());
+		int e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10);
+		detail::uint32 m = detail::exp2((static_cast<detail::uint32>(exp)<<(6+e))&0x7FFFFFFF, 28);
+		exp >>= 25 - e;
+		if(m == 0x80000000) {
+			if(arg.data_&0x8000)
+				exp = -exp;
+			else if(exp > 15)
+				return half(detail::binary, detail::overflow<half::round_style>());
+			return half(detail::binary, detail::fixed2half<half::round_style,31,false,false,false>(m, exp+14));
+		}
+		return half(detail::binary, detail::exp2_post<half::round_style,true>(m, exp, (arg.data_&0x8000)!=0));
+	#endif
+	}
+
+	/// Exponential minus one.
+	/// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` 
+	/// and in <1% of inputs for any other rounding mode.
+	///
+	/// **See also:** Documentation for [std::expm1](https://en.cppreference.com/w/cpp/numeric/math/expm1).
+	/// \param arg function argument
+	/// \return e raised to \a arg and subtracted by 1
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half expm1(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::expm1(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_));
+		if(abs >= 0x4A00)
+			return half(detail::binary, (arg.data_&0x8000) ? detail::rounded<half::round_style,true>(0xBBFF, 1, 1) : detail::overflow<half::round_style>());
+		detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
+		int e = (abs>>10) + (abs<=0x3FF), exp;
+		if(e < 14) {
+			exp = 0;
+			m >>= 14 - e;
+		} else {
+			exp = m >> (45-e);
+			m = (m<<(e-14)) & 0x7FFFFFFF;
+		}
+		m = detail::exp2(m);
+		if(sign) {
+			int s = 0;
+			if(m > 0x80000000) {
+				++exp;
+				m = detail::divide64(0x80000000, m, s);
+			}
+			m = 0x80000000 - ((m>>exp)|((m&((static_cast<detail::uint32>(1)<<exp)-1))!=0)|s);
+			exp = 0;
+		} else
+			m -= (exp<31) ? (0x80000000>>exp) : 1;
+		for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>());
+		return half(detail::binary, detail::rounded<half::round_style,true>(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0));
+	#endif
+	}
+
+	/// Natural logarithm.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::log](https://en.cppreference.com/w/cpp/numeric/math/log).
+	/// \param arg function argument
+	/// \return logarithm of \a arg to base e
+	/// \exception FE_INVALID for signaling NaN or negative argument
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::log(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(arg.data_ & 0x8000)
+			return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs >= 0x7C00)
+			return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		return half(detail::binary, detail::log2_post<half::round_style,0xB8AA3B2A>(
+			detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17));
+	#endif
+	}
+
+	/// Common logarithm.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::log10](https://en.cppreference.com/w/cpp/numeric/math/log10).
+	/// \param arg function argument
+	/// \return logarithm of \a arg to base 10
+	/// \exception FE_INVALID for signaling NaN or negative argument
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log10(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::log10(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(arg.data_ & 0x8000)
+			return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs >= 0x7C00)
+			return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		switch(abs) {
+			case 0x4900: return half(detail::binary, 0x3C00);
+			case 0x5640: return half(detail::binary, 0x4000);
+			case 0x63D0: return half(detail::binary, 0x4200);
+			case 0x70E2: return half(detail::binary, 0x4400);
+		}
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		return half(detail::binary, detail::log2_post<half::round_style,0xD49A784C>(
+			detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16));
+	#endif
+	}
+
+	/// Binary logarithm.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::log2](https://en.cppreference.com/w/cpp/numeric/math/log2).
+	/// \param arg function argument
+	/// \return logarithm of \a arg to base 2
+	/// \exception FE_INVALID for signaling NaN or negative argument
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log2(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::log2(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15, s = 0;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(arg.data_ & 0x8000)
+			return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs >= 0x7C00)
+			return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		if(abs == 0x3C00)
+			return half(detail::binary, 0);
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += (abs>>10);
+		if(!(abs&0x3FF)) {
+			unsigned int value = static_cast<unsigned>(exp<0) << 15, m = std::abs(exp) << 6;
+			for(exp=18; m<0x400; m<<=1,--exp) ;
+			return half(detail::binary, value+(exp<<10)+m);
+		}
+		detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m = 
+			(((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign;
+		if(!m)
+			return half(detail::binary, 0);
+		for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ;
+		for(; m>0xFFFFFFF; m>>=1,++exp)
+			s |= m & 1;
+		return half(detail::binary, detail::fixed2half<half::round_style,27,false,false,true>(m, exp, sign&0x8000, s));
+	#endif
+	}
+
+	/// Natural logarithm plus one.
+	/// This function may be 1 ULP off the correctly rounded exact result in <0.05% of inputs for `std::round_to_nearest` 
+	/// and in ~1% of inputs for any other rounding mode.
+	///
+	/// **See also:** Documentation for [std::log1p](https://en.cppreference.com/w/cpp/numeric/math/log1p).
+	/// \param arg function argument
+	/// \return logarithm of \a arg plus 1 to base e
+	/// \exception FE_INVALID for signaling NaN or argument <-1
+	/// \exception FE_DIVBYZERO for -1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half log1p(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::log1p(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		if(arg.data_ >= 0xBC00)
+			return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 20;
+		if(arg.data_ & 0x8000) {
+			m = 0x40000000 - (m>>-exp);
+			for(exp=0; m<0x40000000; m<<=1,--exp) ;
+		} else {
+			if(exp < 0) {
+				m = 0x40000000 + (m>>-exp);
+				exp = 0;
+			} else {
+				m += 0x40000000 >> exp;
+				int i = m >> 31;
+				m >>= i;
+				exp += i;
+			}
+		}
+		return half(detail::binary, detail::log2_post<half::round_style,0xB8AA3B2A>(detail::log2(m), exp, 17));
+	#endif
+	}
+
+	/// \}
+	/// \anchor power
+	/// \name Power functions
+	/// \{
+
+	/// Square root.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::sqrt](https://en.cppreference.com/w/cpp/numeric/math/sqrt).
+	/// \param arg function argument
+	/// \return square root of \a arg
+	/// \exception FE_INVALID for signaling NaN and negative arguments
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half sqrt(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::sqrt(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = 15;
+		if(!abs || arg.data_ >= 0x7C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_);
+		for(; abs<0x400; abs<<=1,--exp) ;
+		detail::uint32 r = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10);
+		return half(detail::binary, detail::rounded<half::round_style,false>((exp<<10)+(m&0x3FF), r>m, r!=0));
+	#endif
+	}
+
+	/// Cubic root.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::cbrt](https://en.cppreference.com/w/cpp/numeric/math/cbrt).
+	/// \param arg function argument
+	/// \return cubic root of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half cbrt(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::cbrt(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = -15;
+		if(!abs || abs == 0x3C00 || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1, --exp);
+		detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m = 
+			(((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign;
+		for(exp=2; m<0x80000000; m<<=1,--exp) ;
+		m = detail::multiply64(m, 0xAAAAAAAB);
+		int i = m >> 31, s;
+		exp += i;
+		m <<= 1 - i;
+		if(exp < 0) {
+			f = m >> -exp;
+			exp = 0;
+		} else {
+			f = (m<<exp) & 0x7FFFFFFF;
+			exp = m >> (31-exp);
+		}
+		m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26);
+		if(sign) {
+			if(m > 0x80000000) {
+				m = detail::divide64(0x80000000, m, s);
+				++exp;
+			}
+			exp = -exp;
+		}
+		return half(detail::binary, (half::round_style==std::round_to_nearest) ?
+			detail::fixed2half<half::round_style,31,false,false,false>(m, exp+14, arg.data_&0x8000) :
+			detail::fixed2half<half::round_style,23,false,false,false>((m+0x80)>>8, exp+14, arg.data_&0x8000));
+	#endif
+	}
+
+	/// Hypotenuse function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot).
+	/// \param x first argument
+	/// \param y second argument
+	/// \return square root of sum of squares without internal over- or underflows
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root
+	inline half hypot(half x, half y) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_);
+		return half(detail::binary, detail::float2half<half::round_style>(std::hypot(fx, fy)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0;
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx==0x7C00) ? detail::select(0x7C00, y.data_) :
+				(absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_));
+		if(!absx)
+			return half(detail::binary, absy ? detail::check_underflow(absy) : 0);
+		if(!absy)
+			return half(detail::binary, detail::check_underflow(absx));
+		if(absy > absx)
+			std::swap(absx, absy);
+		for(; absx<0x400; absx<<=1,--expx) ;
+		for(; absy<0x400; absy<<=1,--expy) ;
+		detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
+		mx *= mx;
+		my *= my;
+		int ix = mx >> 21, iy = my >> 21;
+		expx = 2*(expx+(absx>>10)) - 15 + ix;
+		expy = 2*(expy+(absy>>10)) - 15 + iy;
+		mx <<= 10 - ix;
+		my <<= 10 - iy;
+		int d = expx - expy;
+		my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+		return half(detail::binary, detail::hypot_post<half::round_style>(mx+my, expx));
+	#endif
+	}
+
+	/// Hypotenuse function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::hypot](https://en.cppreference.com/w/cpp/numeric/math/hypot).
+	/// \param x first argument
+	/// \param y second argument
+	/// \param z third argument
+	/// \return square root of sum of squares without internal over- or underflows
+	/// \exception FE_INVALID if \a x, \a y or \a z is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding of the final square root
+	inline half hypot(half x, half y, half z) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_), fz = detail::half2float<detail::internal_t>(z.data_);
+		return half(detail::binary, detail::float2half<half::round_style>(std::sqrt(fx*fx+fy*fy+fz*fz)));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0;
+		if(!absx)
+			return hypot(y, z);
+		if(!absy)
+			return hypot(x, z);
+		if(!absz)
+			return hypot(x, y);
+		if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
+			return half(detail::binary,	(absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) :
+										(absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) :
+										(absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) :
+										detail::signal(x.data_, y.data_, z.data_));
+		if(absz > absy)
+			std::swap(absy, absz);
+		if(absy > absx)
+			std::swap(absx, absy);
+		if(absz > absy)
+			std::swap(absy, absz);
+		for(; absx<0x400; absx<<=1,--expx) ;
+		for(; absy<0x400; absy<<=1,--expy) ;
+		for(; absz<0x400; absz<<=1,--expz) ;
+		detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400;
+		mx *= mx;
+		my *= my;
+		mz *= mz;
+		int ix = mx >> 21, iy = my >> 21, iz = mz >> 21;
+		expx = 2*(expx+(absx>>10)) - 15 + ix;
+		expy = 2*(expy+(absy>>10)) - 15 + iy;
+		expz = 2*(expz+(absz>>10)) - 15 + iz;
+		mx <<= 10 - ix;
+		my <<= 10 - iy;
+		mz <<= 10 - iz;
+		int d = expy - expz;
+		mz = (d<30) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+		my += mz;
+		if(my & 0x80000000) {
+			my = (my>>1) | (my&1);
+			if(++expy > expx) {
+				std::swap(mx, my);
+				std::swap(expx, expy);
+			}
+		}
+		d = expx - expy;
+		my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
+		return half(detail::binary, detail::hypot_post<half::round_style>(mx+my, expx));
+	#endif
+	}
+
+	/// Power function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.00025% of inputs.
+	///
+	/// **See also:** Documentation for [std::pow](https://en.cppreference.com/w/cpp/numeric/math/pow).
+	/// \param x base
+	/// \param y exponent
+	/// \return \a x raised to \a y
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN or if \a x is finite an negative and \a y is finite and not integral
+	/// \exception FE_DIVBYZERO if \a x is 0 and \a y is negative
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half pow(half x, half y) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::pow(detail::half2float<detail::internal_t>(x.data_), detail::half2float<detail::internal_t>(y.data_))));
+	#else
+		int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15;
+		if(!absy || x.data_ == 0x3C00)
+			return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_));
+		bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1)));
+		unsigned int sign = x.data_ & (static_cast<unsigned>((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15);
+		if(absx >= 0x7C00 || absy >= 0x7C00)
+			return half(detail::binary,	(absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
+										(absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() :
+										(0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U))));
+		if(!absx)
+			return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign);
+		if((x.data_&0x8000) && !is_int)
+			return half(detail::binary, detail::invalid());
+		if(x.data_ == 0xBC00)
+			return half(detail::binary, sign|0x3C00);
+		if(y.data_ == 0x3800)
+			return sqrt(x);
+		if(y.data_ == 0x3C00)
+			return half(detail::binary, detail::check_underflow(x.data_));
+		if(y.data_ == 0x4000)
+			return x * x;
+		for(; absx<0x400; absx<<=1,--exp) ;
+		detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m = 
+			(((ilog<<27)+((detail::log2(static_cast<detail::uint32>((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign;
+		for(exp=-11; m<0x80000000; m<<=1,--exp) ;
+		for(; absy<0x400; absy<<=1,--exp) ;
+		m = detail::multiply64(m, static_cast<detail::uint32>((absy&0x3FF)|0x400)<<21);
+		int i = m >> 31;
+		exp += (absy>>10) + i;
+		m <<= 1 - i;
+		if(exp < 0) {
+			f = m >> -exp;
+			exp = 0;
+		} else {
+			f = (m<<exp) & 0x7FFFFFFF;
+			exp = m >> (31-exp);
+		}
+		return half(detail::binary, detail::exp2_post<half::round_style,false>(detail::exp2(f), exp, ((msign&1)^(y.data_>>15))!=0, sign));
+	#endif
+	}
+
+	/// \}
+	/// \anchor trigonometric
+	/// \name Trigonometric functions
+	/// \{
+
+	/// Compute sine and cosine simultaneously.
+	///	This returns the same results as sin() and cos() but is faster than calling each function individually.
+	///
+	/// This function is exact to rounding for all rounding modes.
+	/// \param arg function argument
+	/// \param sin variable to take sine of \a arg
+	/// \param cos variable to take cosine of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline void sincos(half arg, half *sin, half *cos) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		detail::internal_t f = detail::half2float<detail::internal_t>(arg.data_);
+		*sin = half(detail::binary, detail::float2half<half::round_style>(std::sin(f)));
+		*cos = half(detail::binary, detail::float2half<half::round_style>(std::cos(f)));
+	#else
+		int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k;
+		if(abs >= 0x7C00)
+			*sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		else if(!abs) {
+			*sin = arg;
+			*cos = half(detail::binary, 0x3C00);
+		} else if(abs < 0x2500) {
+			*sin = half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+			*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x3BFF, 1, 1));
+		} else {
+			if(half::round_style != std::round_to_nearest) {
+				switch(abs) {
+				case 0x48B7:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0xBBFF, 1, 1));
+					return;
+				case 0x598C:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x80FC, 1, 1));
+					return;
+				case 0x6A64:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x27FF, 1, 1));
+					return;
+				case 0x6D8C:
+					*sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
+					*cos = half(detail::binary, detail::rounded<half::round_style,true>(0x3BFF, 1, 1));
+					return;
+				}
+			}
+			std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
+			switch(k & 3) {
+				case 1: sc = std::make_pair(sc.second, -sc.first); break;
+				case 2: sc = std::make_pair(-sc.first, -sc.second); break;
+				case 3: sc = std::make_pair(-sc.second, sc.first); break;
+			}
+			*sin = half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((sc.first^-static_cast<detail::uint32>(sign))+sign));
+			*cos = half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>(sc.second));
+		}
+	#endif
+	}
+
+	/// Sine function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::sin](https://en.cppreference.com/w/cpp/numeric/math/sin).
+	/// \param arg function argument
+	/// \return sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half sin(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::sin(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, k;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		if(half::round_style != std::round_to_nearest)
+			switch(abs) {
+				case 0x48B7: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
+				case 0x6A64: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
+				case 0x6D8C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
+			}
+		std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
+		detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)&1)^(arg.data_>>15));
+		return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.second : sc.first)^sign) - sign));
+	#endif
+	}
+
+	/// Cosine function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::cos](https://en.cppreference.com/w/cpp/numeric/math/cos).
+	/// \param arg function argument
+	/// \return cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half cos(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::cos(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, k;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2500)
+			return half(detail::binary, detail::rounded<half::round_style,true>(0x3BFF, 1, 1));
+		if(half::round_style != std::round_to_nearest && abs == 0x598C)
+			return half(detail::binary, detail::rounded<half::round_style,true>(0x80FC, 1, 1));
+		std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
+		detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)^k)&1);
+		return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.first : sc.second)^sign) - sign));
+	#endif
+	}
+
+	/// Tangent function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::tan](https://en.cppreference.com/w/cpp/numeric/math/tan).
+	/// \param arg function argument
+	/// \return tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN or infinity
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half tan(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::tan(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = 13, k;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		if(half::round_style != std::round_to_nearest)
+			switch(abs) {
+				case 0x658C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x07E6, 1, 1));
+				case 0x7330: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x4B62, 1, 1));
+			}
+		std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 30);
+		if(k & 1)
+			sc = std::make_pair(-sc.second, sc.first);
+		detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second);
+		detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx;
+		for(; my<0x80000000; my<<=1,--exp) ;
+		for(; mx<0x80000000; mx<<=1,++exp) ;
+		return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp, (signy^signx^arg.data_)&0x8000));
+	#endif
+	}
+
+	/// Arc sine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::asin](https://en.cppreference.com/w/cpp/numeric/math/asin).
+	/// \param arg function argument
+	/// \return arc sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half asin(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::asin(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(!abs)
+			return arg;
+		if(abs >= 0x3C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
+										detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1));
+		if(abs < 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3))
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_+1, 1, 1));
+		std::pair<detail::uint32,detail::uint32> sc = detail::atan2_args(abs);
+		detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26);
+		return half(detail::binary, detail::fixed2half<half::round_style,30,false,true,true>(m, 14, sign));
+	#endif
+	}
+
+	/// Arc cosine function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::acos](https://en.cppreference.com/w/cpp/numeric/math/acos).
+	/// \param arg function argument
+	/// \return arc cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half acos(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::acos(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15;
+		if(!abs)
+			return half(detail::binary, detail::rounded<half::round_style,true>(0x3E48, 0, 1));
+		if(abs >= 0x3C00)
+			return half(detail::binary,	(abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
+										sign ? detail::rounded<half::round_style,true>(0x4248, 0, 1) : 0);
+		std::pair<detail::uint32,detail::uint32> cs = detail::atan2_args(abs);
+		detail::uint32 m = detail::atan2(cs.second, cs.first, 28);
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(sign ? (0xC90FDAA2-m) : m, 15, 0, sign));
+	#endif
+	}
+
+	/// Arc tangent function.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::atan](https://en.cppreference.com/w/cpp/numeric/math/atan).
+	/// \param arg function argument
+	/// \return arc tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half atan(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::atan(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1) : detail::signal(arg.data_));
+		if(abs <= 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		int exp = (abs>>10) + (abs<=0x3FF);
+		detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10);
+		detail::uint32 m = (exp>15) ?	detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) :
+										detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28);
+		return half(detail::binary, detail::fixed2half<half::round_style,30,false,true,true>(m, 14, sign));
+	#endif
+	}
+
+	/// Arc tangent function.
+	/// This function may be 1 ULP off the correctly rounded exact result in ~0.005% of inputs for `std::round_to_nearest`, 
+	/// in ~0.1% of inputs for `std::round_toward_zero` and in ~0.02% of inputs for any other rounding mode.
+	///
+	/// **See also:** Documentation for [std::atan2](https://en.cppreference.com/w/cpp/numeric/math/atan2).
+	/// \param y numerator
+	/// \param x denominator
+	/// \return arc tangent value
+	/// \exception FE_INVALID if \a x or \a y is signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half atan2(half y, half x) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::atan2(detail::half2float<detail::internal_t>(y.data_), detail::half2float<detail::internal_t>(x.data_))));
+	#else
+		unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000;
+		if(absx >= 0x7C00 || absy >= 0x7C00) {
+			if(absx > 0x7C00 || absy > 0x7C00)
+				return half(detail::binary, detail::signal(x.data_, y.data_));
+			if(absy == 0x7C00)
+				return half(detail::binary, (absx<0x7C00) ?	detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1) :
+													signx ?	detail::rounded<half::round_style,true>(signy|0x40B6, 0, 1) :
+															detail::rounded<half::round_style,true>(signy|0x3A48, 0, 1));
+			return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
+		}
+		if(!absy)
+			return signx ? half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1)) : y;
+		if(!absx)
+			return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
+		int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF);
+		if(d > (signx ? 18 : 12))
+			return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
+		if(signx && d < -11)
+			return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
+		if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) {
+			for(; absy<0x400; absy<<=1,--d) ;
+			detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800;
+			int i = my < mx;
+			d -= i;
+			if(d < -25)
+				return half(detail::binary, detail::underflow<half::round_style>(signy));
+			my <<= 11 + i;
+			return half(detail::binary, detail::fixed2half<half::round_style,11,false,false,true>(my/mx, d+14, signy, my%mx!=0));
+		}
+		detail::uint32 m = detail::atan2(	((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)),
+											((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1)));
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(signx ? (0xC90FDAA2-m) : m, 15, signy, signx));
+	#endif
+	}
+
+	/// \}
+	/// \anchor hyperbolic
+	/// \name Hyperbolic functions
+	/// \{
+
+	/// Hyperbolic sine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::sinh](https://en.cppreference.com/w/cpp/numeric/math/sinh).
+	/// \param arg function argument
+	/// \return hyperbolic sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half sinh(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::sinh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		if(abs <= 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27);
+		detail::uint32 m = mm.first - mm.second;
+		for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ;
+		unsigned int sign = arg.data_ & 0x8000;
+		if(exp > 29)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,false,true>(m, exp, sign));
+	#endif
+	}
+
+	/// Hyperbolic cosine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::cosh](https://en.cppreference.com/w/cpp/numeric/math/cosh).
+	/// \param arg function argument
+	/// \return hyperbolic cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half cosh(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::cosh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00);
+		std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26);
+		detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31;
+		m = (m>>i) | (m&i) | 0x80000000;
+		if((exp+=13+i) > 29)
+			return half(detail::binary, detail::overflow<half::round_style>());
+		return half(detail::binary, detail::fixed2half<half::round_style,31,false,false,true>(m, exp));
+	#endif
+	}
+
+	/// Hyperbolic tangent.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::tanh](https://en.cppreference.com/w/cpp/numeric/math/tanh).
+	/// \param arg function argument
+	/// \return hyperbolic tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half tanh(half arg) {
+	#ifdef HALF_ARITHMETIC_TYPE
+		return half(detail::binary, detail::float2half<half::round_style>(std::tanh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs)
+			return arg;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000));
+		if(abs >= 0x4500)
+			return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
+		if(abs < 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		if(half::round_style != std::round_to_nearest && abs == 0x2D3F)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-3, 0, 1));
+		std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, 27);
+		detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31;
+		for(exp=13; my<0x80000000; my<<=1,--exp) ;
+		mx = (mx>>i) | 0x80000000;
+		return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp-i, arg.data_&0x8000));
+	#endif
+	}
+
+	/// Hyperbolic area sine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::asinh](https://en.cppreference.com/w/cpp/numeric/math/asinh).
+	/// \param arg function argument
+	/// \return area sine value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half asinh(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::asinh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if(!abs || abs >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		if(abs <= 0x2900)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-1, 1, 1));
+		if(half::round_style != std::round_to_nearest)
+			switch(abs)
+			{
+				case 0x32D4: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-13, 1, 1));
+				case 0x3B5B: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-197, 1, 1));
+			}
+		return half(detail::binary, detail::area<half::round_style,true>(arg.data_));
+	#endif
+	}
+
+	/// Hyperbolic area cosine.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::acosh](https://en.cppreference.com/w/cpp/numeric/math/acosh).
+	/// \param arg function argument
+	/// \return area cosine value of \a arg
+	/// \exception FE_INVALID for signaling NaN or arguments <1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half acosh(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::acosh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if((arg.data_&0x8000) || abs < 0x3C00)
+			return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs == 0x3C00)
+			return half(detail::binary, 0);
+		if(arg.data_ >= 0x7C00)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		return half(detail::binary, detail::area<half::round_style,false>(arg.data_));
+	#endif
+	}
+
+	/// Hyperbolic area tangent.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::atanh](https://en.cppreference.com/w/cpp/numeric/math/atanh).
+	/// \param arg function argument
+	/// \return area tangent value of \a arg
+	/// \exception FE_INVALID for signaling NaN or if abs(\a arg) > 1
+	/// \exception FE_DIVBYZERO for +/-1
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half atanh(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::atanh(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF, exp = 0;
+		if(!abs)
+			return arg;
+		if(abs >= 0x3C00)
+			return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
+		if(abs < 0x2700)
+			return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_, 0, 1));
+		detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m;
+		for(; mx<0x80000000; mx<<=1,++exp) ;
+		int i = my >= mx, s;
+		return half(detail::binary, detail::log2_post<half::round_style,0xB8AA3B2A>(detail::log2(
+			(detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000));
+	#endif
+	}
+
+	/// \}
+	/// \anchor special
+	/// \name Error and gamma functions
+	/// \{
+
+	/// Error function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs.
+	///
+	/// **See also:** Documentation for [std::erf](https://en.cppreference.com/w/cpp/numeric/math/erf).
+	/// \param arg function argument
+	/// \return error function value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half erf(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::erf(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(!abs || abs >= 0x7C00)
+			return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg;
+		if(abs >= 0x4200)
+			return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
+		return half(detail::binary, detail::erf<half::round_style,false>(arg.data_));
+	#endif
+	}
+
+	/// Complementary error function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.5% of inputs.
+	///
+	/// **See also:** Documentation for [std::erfc](https://en.cppreference.com/w/cpp/numeric/math/erfc).
+	/// \param arg function argument
+	/// \return 1 minus error function value of \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half erfc(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::erfc(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(abs >= 0x7C00)
+			return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg;
+		if(!abs)
+			return half(detail::binary, 0x3C00);
+		if(abs >= 0x4400)
+			return half(detail::binary, detail::rounded<half::round_style,true>((sign>>1)-(sign>>15), sign>>15, 1));
+		return half(detail::binary, detail::erf<half::round_style,true>(arg.data_));
+	#endif
+	}
+
+	/// Natural logarithm of gamma function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in ~0.025% of inputs.
+	///
+	/// **See also:** Documentation for [std::lgamma](https://en.cppreference.com/w/cpp/numeric/math/lgamma).
+	/// \param arg function argument
+	/// \return natural logarith of gamma function for \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_DIVBYZERO for 0 or negative integer arguments
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half lgamma(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::lgamma(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		int abs = arg.data_ & 0x7FFF;
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
+		if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
+			return half(detail::binary, detail::pole());
+		if(arg.data_ == 0x3C00 || arg.data_ == 0x4000)
+			return half(detail::binary, 0);
+		return half(detail::binary, detail::gamma<half::round_style,true>(arg.data_));
+	#endif
+	}
+
+	/// Gamma function.
+	/// This function may be 1 ULP off the correctly rounded exact result for any rounding mode in <0.25% of inputs.
+	///
+	/// **See also:** Documentation for [std::tgamma](https://en.cppreference.com/w/cpp/numeric/math/tgamma).
+	/// \param arg function argument
+	/// \return gamma function value of \a arg
+	/// \exception FE_INVALID for signaling NaN, negative infinity or negative integer arguments
+	/// \exception FE_DIVBYZERO for 0
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half tgamma(half arg) {
+	#if defined(HALF_ARITHMETIC_TYPE)
+		return half(detail::binary, detail::float2half<half::round_style>(std::tgamma(detail::half2float<detail::internal_t>(arg.data_))));
+	#else
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(!abs)
+			return half(detail::binary, detail::pole(arg.data_));
+		if(abs >= 0x7C00)
+			return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
+		if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
+			return half(detail::binary, detail::invalid());
+		if(arg.data_ >= 0xCA80)
+			return half(detail::binary, detail::underflow<half::round_style>((1-((abs>>(25-(abs>>10)))&1))<<15));
+		if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000))
+			return half(detail::binary, detail::overflow<half::round_style>());
+		if(arg.data_ == 0x3C00)
+			return arg;
+		return half(detail::binary, detail::gamma<half::round_style,false>(arg.data_));
+	#endif
+	}
+
+	/// \}
+	/// \anchor rounding
+	/// \name Rounding
+	/// \{
+
+	/// Nearest integer not less than half value.
+	/// **See also:** Documentation for [std::ceil](https://en.cppreference.com/w/cpp/numeric/math/ceil).
+	/// \param arg half to round
+	/// \return nearest integer not less than \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half ceil(half arg) { return half(detail::binary, detail::integral<std::round_toward_infinity,true,true>(arg.data_)); }
+
+	/// Nearest integer not greater than half value.
+	/// **See also:** Documentation for [std::floor](https://en.cppreference.com/w/cpp/numeric/math/floor).
+	/// \param arg half to round
+	/// \return nearest integer not greater than \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half floor(half arg) { return half(detail::binary, detail::integral<std::round_toward_neg_infinity,true,true>(arg.data_)); }
+
+	/// Nearest integer not greater in magnitude than half value.
+	/// **See also:** Documentation for [std::trunc](https://en.cppreference.com/w/cpp/numeric/math/trunc).
+	/// \param arg half to round
+	/// \return nearest integer not greater in magnitude than \a arg
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half trunc(half arg) { return half(detail::binary, detail::integral<std::round_toward_zero,true,true>(arg.data_)); }
+
+	/// Nearest integer.
+	/// **See also:** Documentation for [std::round](https://en.cppreference.com/w/cpp/numeric/math/round).
+	/// \param arg half to round
+	/// \return nearest integer, rounded away from zero in half-way cases
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half round(half arg) { return half(detail::binary, detail::integral<std::round_to_nearest,false,true>(arg.data_)); }
+
+	/// Nearest integer.
+	/// **See also:** Documentation for [std::lround](https://en.cppreference.com/w/cpp/numeric/math/round).
+	/// \param arg half to round
+	/// \return nearest integer, rounded away from zero in half-way cases
+	/// \exception FE_INVALID if value is not representable as `long`
+	inline long lround(half arg) { return detail::half2int<std::round_to_nearest,false,false,long>(arg.data_); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::rint](https://en.cppreference.com/w/cpp/numeric/math/rint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_INEXACT if value had to be rounded
+	inline half rint(half arg) { return half(detail::binary, detail::integral<half::round_style,true,true>(arg.data_)); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::lrint](https://en.cppreference.com/w/cpp/numeric/math/rint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID if value is not representable as `long`
+	/// \exception FE_INEXACT if value had to be rounded
+	inline long lrint(half arg) { return detail::half2int<half::round_style,true,true,long>(arg.data_); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::nearbyint](https://en.cppreference.com/w/cpp/numeric/math/nearbyint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID for signaling NaN
+	inline half nearbyint(half arg) { return half(detail::binary, detail::integral<half::round_style,true,false>(arg.data_)); }
+	/// Nearest integer.
+	/// **See also:** Documentation for [std::llround](https://en.cppreference.com/w/cpp/numeric/math/round).
+	/// \param arg half to round
+	/// \return nearest integer, rounded away from zero in half-way cases
+	/// \exception FE_INVALID if value is not representable as `long long`
+	inline long long llround(half arg) { return detail::half2int<std::round_to_nearest,false,false,long long>(arg.data_); }
+
+	/// Nearest integer using half's internal rounding mode.
+	/// **See also:** Documentation for [std::llrint](https://en.cppreference.com/w/cpp/numeric/math/rint).
+	/// \param arg half expression to round
+	/// \return nearest integer using default rounding mode
+	/// \exception FE_INVALID if value is not representable as `long long`
+	/// \exception FE_INEXACT if value had to be rounded
+	inline long long llrint(half arg) { return detail::half2int<half::round_style,true,true,long long>(arg.data_); }
+
+	/// \}
+	/// \anchor float
+	/// \name Floating point manipulation
+	/// \{
+
+	/// Decompress floating-point number.
+	/// **See also:** Documentation for [std::frexp](https://en.cppreference.com/w/cpp/numeric/math/frexp).
+	/// \param arg number to decompress
+	/// \param exp address to store exponent at
+	/// \return significant in range [0.5, 1)
+	/// \exception FE_INVALID for signaling NaN
+	inline half frexp(half arg, int *exp) {
+		*exp = 0;
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(abs >= 0x7C00 || !abs)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1,--*exp) ;
+		*exp += (abs>>10) - 14;
+		return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF));
+	}
+
+	/// Multiply by power of two.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::scalbln](https://en.cppreference.com/w/cpp/numeric/math/scalbn).
+	/// \param arg number to modify
+	/// \param exp power of two to multiply with
+	/// \return \a arg multplied by 2 raised to \a exp
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half scalbln(half arg, long exp) {
+		unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
+		if(abs >= 0x7C00 || !abs)
+			return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
+		for(; abs<0x400; abs<<=1,--exp) ;
+		exp += abs >> 10;
+		if(exp > 30)
+			return half(detail::binary, detail::overflow<half::round_style>(sign));
+		else if(exp < -10)
+			return half(detail::binary, detail::underflow<half::round_style>(sign));
+		else if(exp > 0)
+			return half(detail::binary, sign|(exp<<10)|(abs&0x3FF));
+		unsigned int m = (abs&0x3FF) | 0x400;
+		return half(detail::binary, detail::rounded<half::round_style,false>(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0));
+	}
+
+	/// Multiply by power of two.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::scalbn](https://en.cppreference.com/w/cpp/numeric/math/scalbn).
+	/// \param arg number to modify
+	/// \param exp power of two to multiply with
+	/// \return \a arg multplied by 2 raised to \a exp
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half scalbn(half arg, int exp) { return scalbln(arg, exp); }
+
+	/// Multiply by power of two.
+	/// This function is exact to rounding for all rounding modes.
+	///
+	/// **See also:** Documentation for [std::ldexp](https://en.cppreference.com/w/cpp/numeric/math/ldexp).
+	/// \param arg number to modify
+	/// \param exp power of two to multiply with
+	/// \return \a arg multplied by 2 raised to \a exp
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	inline half ldexp(half arg, int exp) { return scalbln(arg, exp); }
+
+	/// Extract integer and fractional parts.
+	/// **See also:** Documentation for [std::modf](https://en.cppreference.com/w/cpp/numeric/math/modf).
+	/// \param arg number to decompress
+	/// \param iptr address to store integer part at
+	/// \return fractional part
+	/// \exception FE_INVALID for signaling NaN
+	inline half modf(half arg, half *iptr) {
+		unsigned int abs = arg.data_ & 0x7FFF;
+		if(abs > 0x7C00) {
+			arg = half(detail::binary, detail::signal(arg.data_));
+			return *iptr = arg, arg;
+		}
+		if(abs >= 0x6400)
+			return *iptr = arg, half(detail::binary, arg.data_&0x8000);
+		if(abs < 0x3C00)
+			return iptr->data_ = arg.data_ & 0x8000, arg;
+		unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask;
+		iptr->data_ = arg.data_ & ~mask;
+		if(!m)
+			return half(detail::binary, arg.data_&0x8000);
+		for(; m<0x400; m<<=1,--exp) ;
+		return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF));
+	}
+
+	/// Extract exponent.
+	/// **See also:** Documentation for [std::ilogb](https://en.cppreference.com/w/cpp/numeric/math/ilogb).
+	/// \param arg number to query
+	/// \return floating-point exponent
+	/// \retval FP_ILOGB0 for zero
+	/// \retval FP_ILOGBNAN for NaN
+	/// \retval INT_MAX for infinity
+	/// \exception FE_INVALID for 0 or infinite values
+	inline int ilogb(half arg) {
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs || abs >= 0x7C00) {
+			detail::raise(FE_INVALID);
+			return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN;
+		}
+		for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
+		return exp;
+	}
+
+	/// Extract exponent.
+	/// **See also:** Documentation for [std::logb](https://en.cppreference.com/w/cpp/numeric/math/logb).
+	/// \param arg number to query
+	/// \return floating-point exponent
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_DIVBYZERO for 0
+	inline half logb(half arg) {
+		int abs = arg.data_ & 0x7FFF, exp;
+		if(!abs)
+			return half(detail::binary, detail::pole(0x8000));
+		if(abs >= 0x7C00)
+			return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
+		for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
+		unsigned int value = static_cast<unsigned>(exp<0) << 15;
+		if(exp) {
+			unsigned int m = std::abs(exp) << 6;
+			for(exp=18; m<0x400; m<<=1,--exp) ;
+			value |= (exp<<10) + m;
+		}
+		return half(detail::binary, value);
+	}
+
+	/// Next representable value.
+	/// **See also:** Documentation for [std::nextafter](https://en.cppreference.com/w/cpp/numeric/math/nextafter).
+	/// \param from value to compute next representable value for
+	/// \param to direction towards which to compute next value
+	/// \return next representable value after \a from in direction towards \a to
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW for infinite result from finite argument
+	/// \exception FE_UNDERFLOW for subnormal result
+	inline half nextafter(half from, half to) {
+		int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
+		if(fabs > 0x7C00 || tabs > 0x7C00)
+			return half(detail::binary, detail::signal(from.data_, to.data_));
+		if(from.data_ == to.data_ || !(fabs|tabs))
+			return to;
+		if(!fabs) {
+			detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT);
+			return half(detail::binary, (to.data_&0x8000)+1);
+		}
+		unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(
+			(from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1;
+		detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00);
+		detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7C00)<0x400);
+		return half(detail::binary, out);
+	}
+
+	/// Next representable value.
+	/// **See also:** Documentation for [std::nexttoward](https://en.cppreference.com/w/cpp/numeric/math/nexttoward).
+	/// \param from value to compute next representable value for
+	/// \param to direction towards which to compute next value
+	/// \return next representable value after \a from in direction towards \a to
+	/// \exception FE_INVALID for signaling NaN
+	/// \exception FE_OVERFLOW for infinite result from finite argument
+	/// \exception FE_UNDERFLOW for subnormal result
+	inline half nexttoward(half from, long double to) {
+		int fabs = from.data_ & 0x7FFF;
+		if(fabs > 0x7C00)
+			return half(detail::binary, detail::signal(from.data_));
+		long double lfrom = static_cast<long double>(from);
+		if(detail::builtin_isnan(to) || lfrom == to)
+			return half(static_cast<float>(to));
+		if(!fabs) {
+			detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT);
+			return half(detail::binary, (static_cast<unsigned>(detail::builtin_signbit(to))<<15)+1);
+		}
+		unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(lfrom<to))<<1) - 1;
+		detail::raise(FE_OVERFLOW, (out&0x7FFF)==0x7C00);
+		detail::raise(FE_UNDERFLOW, !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT && (out&0x7FFF)<0x400);
+		return half(detail::binary, out);
+	}
+
+	/// Take sign.
+	/// **See also:** Documentation for [std::copysign](https://en.cppreference.com/w/cpp/numeric/math/copysign).
+	/// \param x value to change sign for
+	/// \param y value to take sign from
+	/// \return value equal to \a x in magnitude and to \a y in sign
+	inline constexpr half copysign(half x, half y) { return half(detail::binary, x.data_^((x.data_^y.data_)&0x8000)); }
+
+	/// \}
+	/// \anchor classification
+	/// \name Floating point classification
+	/// \{
+
+	/// Classify floating-point value.
+	/// **See also:** Documentation for [std::fpclassify](https://en.cppreference.com/w/cpp/numeric/math/fpclassify).
+	/// \param arg number to classify
+	/// \retval FP_ZERO for positive and negative zero
+	/// \retval FP_SUBNORMAL for subnormal numbers
+	/// \retval FP_INFINITY for positive and negative infinity
+	/// \retval FP_NAN for NaNs
+	/// \retval FP_NORMAL for all other (normal) values
+	inline constexpr int fpclassify(half arg) {
+		return	!(arg.data_&0x7FFF) ? FP_ZERO :
+				((arg.data_&0x7FFF)<0x400) ? FP_SUBNORMAL :
+				((arg.data_&0x7FFF)<0x7C00) ? FP_NORMAL :
+				((arg.data_&0x7FFF)==0x7C00) ? FP_INFINITE :
+				FP_NAN;
+	}
+
+	/// Check if finite number.
+	/// **See also:** Documentation for [std::isfinite](https://en.cppreference.com/w/cpp/numeric/math/isfinite).
+	/// \param arg number to check
+	/// \retval true if neither infinity nor NaN
+	/// \retval false else
+	inline constexpr bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; }
+
+	/// Check for infinity.
+	/// **See also:** Documentation for [std::isinf](https://en.cppreference.com/w/cpp/numeric/math/isinf).
+	/// \param arg number to check
+	/// \retval true for positive or negative infinity
+	/// \retval false else
+	inline constexpr bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; }
+
+	/// Check for NaN.
+	/// **See also:** Documentation for [std::isnan](https://en.cppreference.com/w/cpp/numeric/math/isnan).
+	/// \param arg number to check
+	/// \retval true for NaNs
+	/// \retval false else
+	inline constexpr bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; }
+
+	/// Check if normal number.
+	/// **See also:** Documentation for [std::isnormal](https://en.cppreference.com/w/cpp/numeric/math/isnormal).
+	/// \param arg number to check
+	/// \retval true if normal number
+	/// \retval false if either subnormal, zero, infinity or NaN
+	inline constexpr bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); }
+
+	/// Check sign.
+	/// **See also:** Documentation for [std::signbit](https://en.cppreference.com/w/cpp/numeric/math/signbit).
+	/// \param arg number to check
+	/// \retval true for negative number
+	/// \retval false for positive number
+	inline constexpr bool signbit(half arg) { return (arg.data_&0x8000) != 0; }
+
+	/// \}
+	/// \anchor compfunc
+	/// \name Comparison
+	/// \{
+
+	/// Quiet comparison for greater than.
+	/// **See also:** Documentation for [std::isgreater](https://en.cppreference.com/w/cpp/numeric/math/isgreater).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater than \a y
+	/// \retval false else
+	inline constexpr bool isgreater(half x, half y) {
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comparison for greater equal.
+	/// **See also:** Documentation for [std::isgreaterequal](https://en.cppreference.com/w/cpp/numeric/math/isgreaterequal).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x greater equal \a y
+	/// \retval false else
+	inline constexpr bool isgreaterequal(half x, half y) {
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comparison for less than.
+	/// **See also:** Documentation for [std::isless](https://en.cppreference.com/w/cpp/numeric/math/isless).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less than \a y
+	/// \retval false else
+	inline constexpr bool isless(half x, half y) {
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comparison for less equal.
+	/// **See also:** Documentation for [std::islessequal](https://en.cppreference.com/w/cpp/numeric/math/islessequal).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if \a x less equal \a y
+	/// \retval false else
+	inline constexpr bool islessequal(half x, half y) {
+		return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet comarison for less or greater.
+	/// **See also:** Documentation for [std::islessgreater](https://en.cppreference.com/w/cpp/numeric/math/islessgreater).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if either less or greater
+	/// \retval false else
+	inline constexpr bool islessgreater(half x, half y) {
+		return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y);
+	}
+
+	/// Quiet check if unordered.
+	/// **See also:** Documentation for [std::isunordered](https://en.cppreference.com/w/cpp/numeric/math/isunordered).
+	/// \param x first operand
+	/// \param y second operand
+	/// \retval true if unordered (one or two NaN operands)
+	/// \retval false else
+	inline constexpr bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
+
+	/// \}
+	/// \anchor casting
+	/// \name Casting
+	/// \{
+
+	/// Cast to or from half-precision floating-point number.
+	/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted 
+	/// directly using the default rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
+	///
+	/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types 
+	/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler 
+	/// error and casting between [half](\ref half_float::half)s returns the argument unmodified.
+	/// \tparam T destination type (half or built-in arithmetic type)
+	/// \tparam U source type (half or built-in arithmetic type)
+	/// \param arg value to cast
+	/// \return \a arg converted to destination type
+	/// \exception FE_INVALID if \a T is integer type and result is not representable as \a T
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	template<class T,class U> T half_cast(U arg) { return detail::half_caster<T,U>::cast(arg); }
+
+	/// Cast to or from half-precision floating-point number.
+	/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted 
+	/// directly using the specified rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
+	///
+	/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types 
+	/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler 
+	/// error and casting between [half](\ref half_float::half)s returns the argument unmodified.
+	/// \tparam T destination type (half or built-in arithmetic type)
+	/// \tparam R rounding mode to use.
+	/// \tparam U source type (half or built-in arithmetic type)
+	/// \param arg value to cast
+	/// \return \a arg converted to destination type
+	/// \exception FE_INVALID if \a T is integer type and result is not representable as \a T
+	/// \exception FE_OVERFLOW, ...UNDERFLOW, ...INEXACT according to rounding
+	template<class T,std::float_round_style R,class U> T half_cast(U arg) { return detail::half_caster<T,U,R>::cast(arg); }
+	/// \}
+
+	/// \}
+	/// \anchor errors
+	/// \name Error handling
+	/// \{
+
+	/// Clear exception flags.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::feclearexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feclearexcept).
+	/// \param excepts OR of exceptions to clear
+	/// \retval 0 all selected flags cleared successfully
+	inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; }
+
+	/// Test exception flags.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::fetestexcept](https://en.cppreference.com/w/cpp/numeric/fenv/fetestexcept).
+	/// \param excepts OR of exceptions to test
+	/// \return OR of selected exceptions if raised
+	inline int fetestexcept(int excepts) { return detail::errflags() & excepts; }
+
+	/// Raise exception flags.
+	/// This raises the specified floating point exceptions and also invokes any additional automatic exception handling as 
+	/// configured with the [HALF_ERRHANDLIG_...](\ref HALF_ERRHANDLING_ERRNO) preprocessor symbols.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::feraiseexcept](https://en.cppreference.com/w/cpp/numeric/fenv/feraiseexcept).
+	/// \param excepts OR of exceptions to raise
+	/// \retval 0 all selected exceptions raised successfully
+	inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; }
+
+	/// Save exception flags.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::fegetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag).
+	/// \param flagp adress to store flag state at
+	/// \param excepts OR of flags to save
+	/// \retval 0 for success
+	inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; }
+
+	/// Restore exception flags.
+	/// This only copies the specified exception state (including unset flags) without incurring any additional exception handling.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	///
+	/// **See also:** Documentation for [std::fesetexceptflag](https://en.cppreference.com/w/cpp/numeric/fenv/feexceptflag).
+	/// \param flagp adress to take flag state from
+	/// \param excepts OR of flags to restore
+	/// \retval 0 for success
+	inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; }
+
+	/// Throw C++ exceptions based on set exception flags.
+	/// This function manually throws a corresponding C++ exception if one of the specified flags is set, 
+	/// no matter if automatic throwing (via [HALF_ERRHANDLING_THROW_...](\ref HALF_ERRHANDLING_THROW_INVALID)) is enabled or not.
+	/// This function works even if [automatic exception flag handling](\ref HALF_ERRHANDLING_FLAGS) is disabled, 
+	/// but in that case manual flag management is the only way to raise flags.
+	/// \param excepts OR of exceptions to test
+	/// \param msg error message to use for exception description
+	/// \throw std::domain_error if `FE_INVALID` or `FE_DIVBYZERO` is selected and set
+	/// \throw std::overflow_error if `FE_OVERFLOW` is selected and set
+	/// \throw std::underflow_error if `FE_UNDERFLOW` is selected and set
+	/// \throw std::range_error if `FE_INEXACT` is selected and set
+	inline void fethrowexcept(int excepts, const char *msg = "") {
+		excepts &= detail::errflags();
+#if HALF_ERRHANDLING_THROWS
+	#ifdef HALF_ERRHANDLING_THROW_INVALID
+		if(excepts & FE_INVALID)
+			throw std::domain_error(msg);
+	#endif
+	#ifdef HALF_ERRHANDLING_THROW_DIVBYZERO
+		if(excepts & FE_DIVBYZERO)
+			throw std::domain_error(msg);
+	#endif
+	#ifdef HALF_ERRHANDLING_THROW_OVERFLOW
+		if(excepts & FE_OVERFLOW)
+			throw std::overflow_error(msg);
+	#endif
+	#ifdef HALF_ERRHANDLING_THROW_UNDERFLOW
+		if(excepts & FE_UNDERFLOW)
+			throw std::underflow_error(msg);
+	#endif
+	#ifdef HALF_ERRHANDLING_THROW_INEXACT
+		if(excepts & FE_INEXACT)
+			throw std::range_error(msg);
+	#endif
+#else
+		std::fprintf(stderr, "%s\n", msg);
+		std::terminate();
+#endif
+	}
+	/// \}
+}
+
+
+#undef HALF_UNUSED_NOERR
+#undef constexpr_NOERR
+#undef HALF_TWOS_COMPLEMENT_INT
+#ifdef HALF_POP_WARNINGS
+	#pragma warning(pop)
+	#undef HALF_POP_WARNINGS
+#endif
+

+ 208 - 0
3rd/numpy/include/xtl/xhash.hpp

@@ -0,0 +1,208 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_HASH_HPP
+#define XTL_HASH_HPP
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include <type_traits>
+
+namespace xtl
+{
+
+    std::size_t hash_bytes(const void* buffer, std::size_t length, std::size_t seed);
+
+    uint32_t murmur2_x86(const void* buffer, std::size_t length, uint32_t seed);
+    uint64_t murmur2_x64(const void* buffer, std::size_t length, uint64_t seed);
+
+    /******************************
+     *  hash_bytes implementation *
+     ******************************/
+
+    namespace detail
+    {
+        // Dummy hash implementation for unusual sizeof(std::size_t)
+        template <std::size_t N>
+        std::size_t murmur_hash(const void* buffer, std::size_t length, std::size_t seed)
+        {
+            std::size_t hash = seed;
+            const char* data = static_cast<const char*>(buffer);
+            for (; length != 0; --length)
+            {
+                hash = (hash * 131) + static_cast<std::size_t>(*data++);
+            }
+            return hash;
+        }
+
+        // Murmur hash is an algorithm written by Austin Appleby. See https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
+        inline uint32_t murmur2_x86_impl(const void* buffer, std::size_t length, uint32_t seed)
+        {
+            const uint32_t m = 0x5bd1e995;
+            uint32_t len = static_cast<uint32_t>(length);
+
+            // Initialize the hash to a 'random' value
+            uint32_t h = seed ^ len;
+
+            // Mix 4 bytes at a time into the hash
+            const unsigned char * data = (const unsigned char *)buffer;
+            
+            while(len >= 4)
+            {
+                uint32_t k = *(uint32_t*)data;
+                k *= m;
+                k ^= k >> 24;
+                k *= m;
+                
+                h *= m;
+                h ^= k;
+                
+                data += 4;
+                len -= 4;
+            }
+            
+            // Handle the last few bytes of the input array
+            switch(len)
+            {
+            case 3: h ^= static_cast<uint32_t>(data[2] << 16);
+            case 2: h ^= static_cast<uint32_t>(data[1] << 8);
+            case 1: h ^= static_cast<uint32_t>(data[0]);
+                h *= m;
+            };
+
+            // Do a few final mixes of the hash to ensure the last few
+            // // bytes are well-incorporated.
+            h ^= h >> 13;
+            h *= m;
+            h ^= h >> 15;
+            
+            return h;
+        }
+
+        template <>
+        inline std::size_t murmur_hash<4>(const void* buffer, std::size_t length, std::size_t seed)
+        {
+            return std::size_t(murmur2_x86_impl(buffer, length, static_cast<uint32_t>(seed)));
+        }
+
+        inline std::size_t load_bytes(const char* p, int n)
+        {
+            std::size_t result = 0;
+            --n;
+            do
+            {
+                result = (result << 8) + static_cast<unsigned char>(p[n]);
+            } while (--n >= 0);
+            return result;
+        }
+
+#if INTPTR_MAX == INT64_MAX
+        // 64-bits hash for 64-bits platform
+        template <>
+        inline std::size_t murmur_hash<8>(const void* buffer, std::size_t length, std::size_t seed)
+        {
+            constexpr std::size_t m = (static_cast<std::size_t>(0xc6a4a793UL) << 32UL) +
+                static_cast<std::size_t>(0x5bd1e995UL);
+            constexpr int r = 47;
+            const char* data = static_cast<const char*>(buffer);
+            const char* end = data + (length & std::size_t(~0x7));
+            std::size_t hash = seed ^ (length * m);
+            while (data != end)
+            {
+                std::size_t k;
+                std::memcpy(&k, data, sizeof(k));
+                k *= m;
+                k ^= k >> r;
+                k *= m;
+                hash ^= k;
+                hash *= m;
+                data += 8;
+            }
+            if ((length & 0x7) != 0)
+            {
+                std::size_t k = load_bytes(end, length & 0x7);
+                hash ^= k;
+                hash *= m;
+            }
+            hash ^= hash >> r;
+            hash *= m;
+            hash ^= hash >> r;
+
+            return hash;
+        }
+#elif INTPTR_MAX == INT32_MAX
+        //64-bits hash for 32-bits platform
+        inline void mmix(uint32_t& h, uint32_t& k, uint32_t m, int r)
+        {
+            k *= m; k ^= k >> r; k *= m; h *= m; h ^= k;
+        }
+
+        template <>
+        inline std::size_t murmur_hash<8>(const void* buffer, std::size_t length, std::size_t seed)
+        {
+            const uint32_t m = 0x5bd1e995;
+            const int r = 24;
+            uint32_t l = length;
+
+            const auto* data = reinterpret_cast<const unsigned char*>(buffer);
+
+            uint32_t h = seed;
+
+            while (length >= 4)
+            {
+                uint32_t k = *(uint32_t*)data;
+
+                mmix(h, k, m, r);
+
+                data += 4;
+                length -= 4;
+            }
+
+            uint32_t t = 0;
+
+            switch (length)
+            {
+            case 3: t ^= data[2] << 16;
+            case 2: t ^= data[1] << 8;
+            case 1: t ^= data[0];
+            };
+
+            mmix(h, t, m, r);
+            mmix(h, l, m, r);
+
+            h ^= h >> 13;
+            h *= m;
+            h ^= h >> 15;
+
+            return h;
+        }
+#else
+#error Unknown pointer size or missing size macros!
+#endif
+    }
+
+    inline std::size_t hash_bytes(const void* buffer, std::size_t length, std::size_t seed)
+    {
+        return detail::murmur_hash<sizeof(std::size_t)>(buffer, length, seed);
+    }
+
+    inline uint32_t murmur2_x86(const void* buffer, std::size_t length, uint32_t seed)
+    {
+        return detail::murmur2_x86_impl(buffer, length, seed);
+    }
+
+    inline uint64_t murmur2_x64(const void* buffer, std::size_t length, uint64_t seed)
+    {
+        return detail::murmur_hash<8>(buffer, length, seed);
+    }
+}
+
+#endif

+ 73 - 0
3rd/numpy/include/xtl/xhierarchy_generator.hpp

@@ -0,0 +1,73 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_HIERARCHY_GENERATOR_HPP
+#define XTL_HIERARCHY_GENERATOR_HPP
+
+#include "xmeta_utils.hpp"
+
+namespace xtl
+{
+
+    /*********************************
+     * scattered hierarchy generator *
+     *********************************/
+
+    template <class TL, template <class> class U>
+    class xscatter_hierarchy_generator;
+
+    template <template <class> class U, class T, class... Args>
+    class xscatter_hierarchy_generator<mpl::vector<T, Args...>, U>
+        : public U<T>, public xscatter_hierarchy_generator<mpl::vector<Args...>, U>
+    {
+    };
+
+    template <template <class> class U>
+    class xscatter_hierarchy_generator<mpl::vector<>, U>
+    {
+    };
+
+    /******************************
+     * linear hierarchy generator *
+     ******************************/
+
+    class default_root {};
+
+    template <class TL, template <class, class> class U, class Root = default_root>
+    class xlinear_hierarchy_generator;
+
+    template <template <class, class> class U, class Root, class T0, class... Args>
+    class xlinear_hierarchy_generator<mpl::vector<T0, Args...>, U, Root>
+        : public U<T0, xlinear_hierarchy_generator<mpl::vector<Args...>, U, Root>>
+    {
+    public:
+
+        using base_type = U<T0, xlinear_hierarchy_generator<mpl::vector<Args...>, U, Root>>;
+        template <class... T>
+        inline xlinear_hierarchy_generator(T&&... args)
+            : base_type(std::forward<T>(args)...)
+        {
+        }
+    };
+
+    template <template <class, class> class U, class Root>
+    class xlinear_hierarchy_generator<mpl::vector<>, U, Root>
+        : public Root
+    {
+    public:
+
+        template <class... T>
+        inline xlinear_hierarchy_generator(T&&... args)
+            : Root(std::forward<T>(args)...)
+        {
+        }
+    };
+}
+
+#endif

+ 422 - 0
3rd/numpy/include/xtl/xiterator_base.hpp

@@ -0,0 +1,422 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XITERATOR_BASE_HPP
+#define XTL_XITERATOR_BASE_HPP
+
+#include <cstddef>
+#include <iterator>
+
+namespace xtl
+{
+    /**************************************
+     * class xbidirectional_iterator_base *
+     **************************************/
+
+    template <class I, class T, class D = std::ptrdiff_t, class P = T*, class R = T&>
+    class xbidirectional_iterator_base
+    {
+    public:
+
+        using derived_type = I;
+        using value_type = T;
+        using reference = R;
+        using pointer = P;
+        using difference_type = D;
+        using iterator_category = std::bidirectional_iterator_tag;
+
+        inline friend derived_type operator++(derived_type& d, int)
+        {
+            derived_type tmp(d);
+            ++d;
+            return tmp;
+        }
+
+        inline friend derived_type operator--(derived_type& d, int)
+        {
+            derived_type tmp(d);
+            --d;
+            return tmp;
+
+        }
+
+        inline friend bool operator!=(const derived_type& lhs, const derived_type& rhs)
+        {
+            return !(lhs == rhs);
+        }
+   };
+
+    template <class T>
+    using xbidirectional_iterator_base2 = xbidirectional_iterator_base<typename T::iterator_type,
+                                                                       typename T::value_type,
+                                                                       typename T::difference_type,
+                                                                       typename T::pointer,
+                                                                       typename T::reference>;
+
+    template <class I, class T>
+    using xbidirectional_iterator_base3 = xbidirectional_iterator_base<I,
+                                                                       typename T::value_type,
+                                                                       typename T::difference_type,
+                                                                       typename T::pointer,
+                                                                       typename T::reference>;
+
+    /********************************
+     * xrandom_access_iterator_base *
+     ********************************/
+
+    template <class I, class T, class D = std::ptrdiff_t, class P = T*, class R = T&>
+    class xrandom_access_iterator_base : public xbidirectional_iterator_base<I, T, D, P, R>
+    {
+    public:
+
+        using derived_type = I;
+        using value_type = T;
+        using reference = R;
+        using pointer = P;
+        using difference_type = D;
+        using iterator_category = std::random_access_iterator_tag;
+
+        inline reference operator[](difference_type n) const
+        {
+            return *(*static_cast<const derived_type*>(this) + n);
+        }
+
+        inline friend derived_type operator+(const derived_type& it, difference_type n)
+        {
+            derived_type tmp(it);
+            return tmp += n;
+        }
+
+        inline friend derived_type operator+(difference_type n, const derived_type& it)
+        {
+            derived_type tmp(it);
+            return tmp += n;
+        }
+
+        inline friend derived_type operator-(const derived_type& it, difference_type n)
+        {
+            derived_type tmp(it);
+            return tmp -= n;
+        }
+
+        inline friend bool operator<=(const derived_type& lhs, const derived_type& rhs)
+        {
+            return !(rhs < lhs);
+        }
+
+        inline friend bool operator>=(const derived_type& lhs, const derived_type& rhs)
+        {
+            return !(lhs < rhs);
+        }
+
+        inline friend bool operator>(const derived_type& lhs, const derived_type& rhs)
+        {
+            return rhs < lhs;
+        }
+ 
+    };
+
+    template <class T>
+    using xrandom_access_iterator_base2 = xrandom_access_iterator_base<typename T::iterator_type,
+                                                                       typename T::value_type,
+                                                                       typename T::difference_type,
+                                                                       typename T::pointer,
+                                                                       typename T::reference>;
+
+    template <class I, class T>
+    using xrandom_access_iterator_base3 = xrandom_access_iterator_base<I,
+                                                                       typename T::value_type,
+                                                                       typename T::difference_type,
+                                                                       typename T::pointer,
+                                                                       typename T::reference>;
+
+    /*******************************
+     * xrandom_access_iterator_ext *
+     *******************************/
+
+    // Extension for random access iterators defining operator[] and operator+ overloads
+    // accepting size_t arguments.
+    template <class I, class R>
+    class xrandom_access_iterator_ext
+    {
+    public:
+
+        using derived_type = I;
+        using reference = R;
+        using size_type = std::size_t;
+
+        inline reference operator[](size_type n) const
+        {
+            return *(*static_cast<const derived_type*>(this) + n);
+        }
+
+        inline friend derived_type operator+(const derived_type& it, size_type n)
+        {
+            derived_type tmp(it);
+            return tmp += n;
+        }
+
+        inline friend derived_type operator+(size_type n, const derived_type& it)
+        {
+            derived_type tmp(it);
+            return tmp += n;
+        }
+
+        inline friend derived_type operator-(const derived_type& it, size_type n)
+        {
+            derived_type tmp(it);
+            return tmp -= n;
+        }
+    };
+
+    /*****************
+     * xkey_iterator *
+     *****************/
+
+    template <class M>
+    class xkey_iterator : public xbidirectional_iterator_base<xkey_iterator<M>, const typename M::key_type>
+    {
+    public:
+
+        using self_type = xkey_iterator;
+        using base_type = xbidirectional_iterator_base<self_type, const typename M::key_type>;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using pointer = typename base_type::pointer;
+        using difference_type = typename base_type::difference_type;
+        using iterator_category = typename base_type::iterator_category;
+        using subiterator = typename M::const_iterator;
+
+        inline xkey_iterator(subiterator it) noexcept
+            : m_it(it)
+        {
+        }
+
+        inline self_type& operator++()
+        {
+            ++m_it;
+            return *this;
+        }
+
+        inline self_type& operator--()
+        {
+            --m_it;
+            return *this;
+        }
+
+        inline reference operator*() const
+        {
+            return m_it->first;
+        }
+
+        inline pointer operator->() const
+        {
+            return&(m_it->first);
+        }
+
+        inline bool operator==(const self_type& rhs) const
+        {
+            return m_it == rhs.m_it;
+        }
+
+    private:
+
+        subiterator m_it;
+    };
+
+    /*******************
+     * xvalue_iterator *
+     *******************/
+
+    namespace detail
+    {
+        template <class M>
+        struct xvalue_iterator_types
+        {
+            using subiterator = typename M::iterator;
+            using value_type = typename M::mapped_type;
+            using reference = value_type&;
+            using pointer = value_type*;
+            using difference_type = typename subiterator::difference_type;
+        };
+
+        template <class M>
+        struct xvalue_iterator_types<const M>
+        {
+            using subiterator = typename M::const_iterator;
+            using value_type = typename M::mapped_type;
+            using reference = const value_type&;
+            using pointer = const value_type*;
+            using difference_type = typename subiterator::difference_type;
+        };
+   }
+
+    template <class M>
+    class xvalue_iterator : xbidirectional_iterator_base3<xvalue_iterator<M>,
+                                                          detail::xvalue_iterator_types<M>>
+    {
+    public:
+
+        using self_type = xvalue_iterator<M>;
+        using base_type = xbidirectional_iterator_base3<self_type, detail::xvalue_iterator_types<M>>;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using pointer = typename base_type::pointer;
+        using difference_type = typename base_type::difference_type;
+        using subiterator = typename detail::xvalue_iterator_types<M>::subiterator;
+
+        inline xvalue_iterator(subiterator it) noexcept
+            : m_it(it)
+        {
+        }
+
+        inline self_type& operator++()
+        {
+            ++m_it;
+            return *this;
+        }
+
+        inline self_type& operator--()
+        {
+            --m_it;
+            return *this;
+        }
+
+        inline reference operator*() const
+        {
+            return m_it->second;
+        }
+
+        inline pointer operator->() const
+        {
+            return&(m_it->second);
+        }
+
+        inline bool operator==(const self_type& rhs) const
+        {
+            return m_it == rhs.m_it;
+        }
+    private:
+
+        subiterator m_it;
+    };
+
+    /**********************
+     * xstepping_iterator *
+     **********************/
+
+    template <class It>
+    class xstepping_iterator : public xrandom_access_iterator_base3<xstepping_iterator<It>,
+                                                                    std::iterator_traits<It>>
+    {
+    public:
+
+        using self_type = xstepping_iterator;
+        using base_type = xrandom_access_iterator_base3<self_type, std::iterator_traits<It>>;
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using pointer = typename base_type::pointer;
+        using difference_type = typename base_type::difference_type;
+        using iterator_category = typename base_type::iterator_category;
+        using subiterator = It;
+
+        xstepping_iterator() = default;
+
+        inline xstepping_iterator(subiterator it, difference_type step) noexcept
+            : m_it(it), m_step(step)
+        {
+        }
+
+        inline self_type& operator++()
+        {
+            std::advance(m_it, m_step);
+            return *this;
+        }
+
+        inline self_type& operator--()
+        {
+            std::advance(m_it, -m_step);
+            return *this;
+        }
+
+        inline self_type& operator+=(difference_type n)
+        {
+            std::advance(m_it, n*m_step);
+            return *this;
+        }
+
+        inline self_type& operator-=(difference_type n)
+        {
+            std::advance(m_it, -n*m_step);
+            return *this;
+        }
+
+        inline difference_type operator-(const self_type& rhs) const
+        {
+            return std::distance(rhs.m_it, m_it) / m_step;
+        }
+
+        inline reference operator*() const
+        {
+            return *m_it;
+        }
+
+        inline pointer operator->() const
+        {
+            return m_it;
+        }
+
+        inline bool equal(const self_type& rhs) const
+        {
+            return m_it == rhs.m_it && m_step == rhs.m_step;
+        }
+
+        inline bool less_than(const self_type& rhs) const
+        {
+            return m_it < rhs.m_it && m_step == rhs.m_step;
+        }
+
+    private:
+
+        subiterator m_it;
+        difference_type m_step;
+    };
+
+    template <class It>
+    inline bool operator==(const xstepping_iterator<It>& lhs, const xstepping_iterator<It>& rhs)
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <class It>
+    inline bool operator<(const xstepping_iterator<It>& lhs, const xstepping_iterator<It>& rhs)
+    {
+        return lhs.less_than(rhs);
+    }
+
+    template <class It>
+    inline xstepping_iterator<It> make_stepping_iterator(It it, typename std::iterator_traits<It>::difference_type step)
+    {
+        return xstepping_iterator<It>(it, step);
+    }
+
+    /***********************
+     * common_iterator_tag *
+     ***********************/
+
+    template <class... Its>
+    struct common_iterator_tag : std::common_type<typename std::iterator_traits<Its>::iterator_category...>
+    {
+    };
+
+    template <class... Its>
+    using common_iterator_tag_t = typename common_iterator_tag<Its...>::type;
+}
+
+#endif

+ 546 - 0
3rd/numpy/include/xtl/xmasked_value.hpp

@@ -0,0 +1,546 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
+* Martin Renou                                                             *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XMASKED_VALUE_HPP
+#define XTL_XMASKED_VALUE_HPP
+
+#include "xmasked_value_meta.hpp"
+#include "xtype_traits.hpp"
+
+namespace xtl
+{
+    template <class T>
+    inline xmasked_value<T, bool> masked() noexcept
+    {
+        return xmasked_value<T, bool>(T(0), false);
+    }
+
+    /****************************
+    * xmasked_value declaration *
+    *****************************/
+
+    template <class T, class B>
+    class xmasked_value
+    {
+    public:
+
+        using self_type = xmasked_value<T, B>;
+
+        using value_type = T;
+        using flag_type = B;
+
+        template <class T1, class B1>
+        constexpr xmasked_value(T1&& value, B1&& flag);
+
+        template <class T1>
+        constexpr xmasked_value(T1&& value);
+
+        explicit constexpr xmasked_value();
+
+        inline operator value_type() {
+            return m_value;
+        }
+
+        std::add_lvalue_reference_t<T> value() & noexcept;
+        std::add_lvalue_reference_t<std::add_const_t<T>> value() const & noexcept;
+        std::conditional_t<std::is_reference<T>::value, apply_cv_t<T, std::decay_t<T>>&, std::decay_t<T>> value() && noexcept;
+        std::conditional_t<std::is_reference<T>::value, const std::decay_t<T>&, std::decay_t<T>> value() const && noexcept;
+
+        std::add_lvalue_reference_t<B> visible() & noexcept;
+        std::add_lvalue_reference_t<std::add_const_t<B>> visible() const & noexcept;
+        std::conditional_t<std::is_reference<B>::value, apply_cv_t<B, std::decay_t<B>>&, std::decay_t<B>> visible() && noexcept;
+        std::conditional_t<std::is_reference<B>::value, const std::decay_t<B>&, std::decay_t<B>> visible() const && noexcept;
+
+        template <class T1, class B1>
+        bool equal(const xmasked_value<T1, B1>& rhs) const noexcept;
+
+        template <class T1, XTL_DISALLOW(is_xmasked_value<T1>)>
+        bool equal(const T1& rhs) const noexcept;
+
+        template <class T1, class B1>
+        void swap(xmasked_value<T1, B1>& other);
+
+#define DEFINE_ASSIGN_OPERATOR(OP)                                                            \
+        template <class T1>                                                                   \
+        inline xmasked_value& operator OP(const T1& rhs)                                      \
+        {                                                                                     \
+            if (m_visible)                                                                    \
+            {                                                                                 \
+                m_value OP rhs;                                                               \
+            }                                                                                 \
+            return *this;                                                                     \
+        }                                                                                     \
+                                                                                              \
+        template <class T1, class B1>                                                         \
+        inline xmasked_value& operator OP(const xmasked_value<T1, B1>& rhs)                   \
+        {                                                                                     \
+            m_visible = m_visible && rhs.visible();                                           \
+            if (m_visible)                                                                    \
+            {                                                                                 \
+                m_value OP rhs.value();                                                       \
+            }                                                                                 \
+            return *this;                                                                     \
+        }
+
+        DEFINE_ASSIGN_OPERATOR(=);
+        DEFINE_ASSIGN_OPERATOR(+=);
+        DEFINE_ASSIGN_OPERATOR(-=);
+        DEFINE_ASSIGN_OPERATOR(*=);
+        DEFINE_ASSIGN_OPERATOR(/=);
+        DEFINE_ASSIGN_OPERATOR(%=);
+        DEFINE_ASSIGN_OPERATOR(&=);
+        DEFINE_ASSIGN_OPERATOR(|=);
+        DEFINE_ASSIGN_OPERATOR(^=);
+#undef DEFINE_ASSIGN_OPERATOR
+
+    private:
+
+        value_type m_value;
+        flag_type m_visible;
+    };
+
+    /********************************
+     * xmasked_value implementation *
+     ********************************/
+
+    template <class T, class B>
+    template <class T1, class B1>
+    inline constexpr xmasked_value<T, B>::xmasked_value(T1&& value, B1&& flag)
+        : m_value(std::forward<T1>(value)), m_visible(std::forward<B1>(flag))
+    {
+    }
+
+    template <class T, class B>
+    template <class T1>
+    inline constexpr xmasked_value<T, B>::xmasked_value(T1&& value)
+        : m_value(std::forward<T1>(value)), m_visible(true)
+    {
+    }
+
+    template <class T, class B>
+    inline constexpr xmasked_value<T, B>::xmasked_value()
+        : m_value(0), m_visible(true)
+    {
+    }
+
+    template <class T>
+    inline auto masked_value(T&& val)
+    {
+        return xmasked_value<T>(std::forward<T>(val));
+    }
+
+    template <class T, class B>
+    inline auto masked_value(T&& val, B&& mask)
+    {
+        return xmasked_value<T, B>(std::forward<T>(val), std::forward<B>(mask));
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::value() & noexcept -> std::add_lvalue_reference_t<T>
+    {
+        return m_value;
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::value() const & noexcept -> std::add_lvalue_reference_t<std::add_const_t<T>>
+    {
+        return m_value;
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::value() && noexcept -> std::conditional_t<std::is_reference<T>::value, apply_cv_t<T, std::decay_t<T>>&, std::decay_t<T>>
+    {
+        return m_value;
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::value() const && noexcept -> std::conditional_t<std::is_reference<T>::value, const std::decay_t<T>&, std::decay_t<T>>
+    {
+        return m_value;
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::visible() & noexcept -> std::add_lvalue_reference_t<B>
+    {
+        return m_visible;
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::visible() const & noexcept -> std::add_lvalue_reference_t<std::add_const_t<B>>
+    {
+        return m_visible;
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::visible() && noexcept -> std::conditional_t<std::is_reference<B>::value, apply_cv_t<B, std::decay_t<B>>&, std::decay_t<B>>
+    {
+        return m_visible;
+    }
+
+    template <class T, class B>
+    inline auto xmasked_value<T, B>::visible() const && noexcept -> std::conditional_t<std::is_reference<B>::value, const std::decay_t<B>&, std::decay_t<B>>
+    {
+        return m_visible;
+    }
+
+    template <class T, class B>
+    template <class T1, class B1>
+    inline bool xmasked_value<T, B>::equal(const xmasked_value<T1, B1>& rhs) const noexcept
+    {
+        return (!m_visible && !rhs.visible()) || (m_value == rhs.value() && (m_visible && rhs.visible()));
+    }
+
+    template <class T, class B>
+    template <class T1, check_disallow<is_xmasked_value<T1>>>
+    inline bool xmasked_value<T, B>::equal(const T1& rhs) const noexcept
+    {
+        return m_visible && m_value == rhs;
+    }
+
+    template <class T, class B>
+    template <class T1, class B1>
+    inline void xmasked_value<T, B>::swap(xmasked_value<T1, B1>& other)
+    {
+        using std::swap;
+        swap(m_value, other.m_value);
+        swap(m_visible, other.m_visible);
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline bool operator==(const xmasked_value<T1, B1>& lhs, const xmasked_value<T2, B2>& rhs) noexcept
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(negation<is_xmasked_value<T1>>)>
+    inline bool operator==(const T1& lhs, const xmasked_value<T2, B2>& rhs) noexcept
+    {
+        return rhs.equal(lhs);
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(negation<is_xmasked_value<T2>>)>
+    inline bool operator==(const xmasked_value<T1, B1>& lhs, const T2& rhs) noexcept
+    {
+        return lhs.equal(rhs);
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline bool operator!=(const xmasked_value<T1, B1>& lhs, const xmasked_value<T2, B2>& rhs) noexcept
+    {
+        return !lhs.equal(rhs);
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(negation<is_xmasked_value<T1>>)>
+    inline bool operator!=(const T1& lhs, const xmasked_value<T2, B2>& rhs) noexcept
+    {
+        return !rhs.equal(lhs);
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(negation<is_xmasked_value<T2>>)>
+    inline bool operator!=(const xmasked_value<T1, B1>& lhs, const T2& rhs) noexcept
+    {
+        return !lhs.equal(rhs);
+    }
+
+    template <class T, class B>
+    inline auto operator+(const xmasked_value<T, B>& e) noexcept
+        -> xmasked_value<std::decay_t<T>, std::decay_t<B>>
+    {
+        return xmasked_value<std::decay_t<T>, std::decay_t<B>>(e.value(), e.visible());
+    }
+
+    template <class T, class B>
+    inline auto operator-(const xmasked_value<T, B>& e) noexcept
+        -> xmasked_value<std::decay_t<T>, std::decay_t<B>>
+    {
+        return xmasked_value<std::decay_t<T>, std::decay_t<B>>(-e.value(), e.visible());
+    }
+
+    template <class T, class B>
+    inline auto operator~(const xmasked_value<T, B>& e) noexcept
+        -> xmasked_value<std::decay_t<T>>
+    {
+        using value_type = std::decay_t<T>;
+        return e.visible() ? masked_value(~e.value()) : masked<value_type>();
+    }
+
+    template <class T, class B>
+    inline auto operator!(const xmasked_value<T, B>& e) noexcept -> xmasked_value<decltype(!e.value())>
+    {
+        using return_type = xmasked_value<decltype(!e.value())>;
+        using value_type = typename return_type::value_type;
+        return e.visible() ? return_type(!e.value()) : masked<value_type>();
+    }
+
+    template <class T, class B, class OC, class OT>
+    inline std::basic_ostream<OC, OT>& operator<<(std::basic_ostream<OC, OT>& out, xmasked_value<T, B> v)
+    {
+        if (v.visible())
+        {
+            out << v.value();
+        }
+        else
+        {
+            out << "masked";
+        }
+        return out;
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline void swap(xmasked_value<T1, B1>& lhs, xmasked_value<T2, B2>& rhs)
+    {
+        lhs.swap(rhs);
+    }
+
+#define DEFINE_OPERATOR(OP)                                                                                   \
+    template <class T1, class B1, class T2, class B2>                                                         \
+    inline auto operator OP(const xmasked_value<T1, B1>& e1, const xmasked_value<T2, B2>& e2) noexcept        \
+        -> xmasked_value<promote_type_t<std::decay_t<T1>, std::decay_t<T2>>>                                  \
+    {                                                                                                         \
+        using value_type = promote_type_t<std::decay_t<T1>, std::decay_t<T2>>;                                \
+        return e1.visible() && e2.visible() ? masked_value(e1.value() OP e2.value()) : masked<value_type>();  \
+    }                                                                                                         \
+                                                                                                              \
+    template <class T1, class B1, class T2, XTL_REQUIRES(negation<is_xmasked_value<T2>>)>                     \
+    inline auto operator OP(const xmasked_value<T1, B1>& e1, const T2& e2) noexcept                           \
+        -> xmasked_value<promote_type_t<std::decay_t<T1>, std::decay_t<T2>>>                                  \
+    {                                                                                                         \
+        using value_type = promote_type_t<std::decay_t<T1>, std::decay_t<T2>>;                                \
+        return e1.visible() ? masked_value(e1.value() OP e2) : masked<value_type>();                          \
+    }                                                                                                         \
+                                                                                                              \
+    template <class T1, class T2, class B2, XTL_REQUIRES(negation<is_xmasked_value<T1>>)>                     \
+    inline auto operator OP(const T1& e1, const xmasked_value<T2, B2>& e2) noexcept                           \
+        -> xmasked_value<promote_type_t<std::decay_t<T1>, std::decay_t<T2>>>                                  \
+    {                                                                                                         \
+        using value_type = promote_type_t<std::decay_t<T1>, std::decay_t<T2>>;                                \
+        return e2.visible() ? masked_value(e1 OP e2.value()) : masked<value_type>();                          \
+    }
+
+#define DEFINE_BOOL_OPERATOR(OP)                                                                           \
+    template <class T1, class B1, class T2, class B2>                                                      \
+    inline auto operator OP(const xmasked_value<T1, B1>& e1, const xmasked_value<T2, B2>& e2) noexcept     \
+        -> xmasked_value<decltype(e1.value() OP e2.value())>                                               \
+    {                                                                                                      \
+        return e1.visible() && e2.visible() ?                                                              \
+            masked_value(e1.value() OP e2.value()) :                                                       \
+            masked<decltype(e1.value() OP e2.value())>();                                                  \
+    }                                                                                                      \
+                                                                                                           \
+    template <class T1, class B1, class T2, XTL_REQUIRES(negation<is_xmasked_value<T2>>)>                  \
+    inline auto operator OP(const xmasked_value<T1, B1>& e1, const T2& e2) noexcept                        \
+        -> xmasked_value<decltype(e1.value() OP e2)>                                                       \
+    {                                                                                                      \
+        return e1.visible() ? masked_value(e1.value() OP e2) : masked<decltype(e1.value() OP e2)>();       \
+    }                                                                                                      \
+                                                                                                           \
+    template <class T1, class T2, class B2, XTL_REQUIRES(negation<is_xmasked_value<T1>>)>                  \
+    inline auto operator OP(const T1& e1, const xmasked_value<T2, B2>& e2) noexcept                        \
+        -> xmasked_value<decltype(e1 OP e2.value())>                                                       \
+    {                                                                                                      \
+        return e2.visible() ? masked_value(e1 OP e2.value()) : masked<decltype(e1 OP e2.value())>();       \
+    }
+
+#define DEFINE_UNARY_OPERATOR(OP)                                                     \
+    template <class T, class B>                                                       \
+    inline xmasked_value<std::decay_t<T>> OP(const xmasked_value<T, B>& e)            \
+    {                                                                                 \
+        using std::OP;                                                                \
+        return e.visible() ? masked_value(OP(e.value())) : masked<std::decay_t<T>>(); \
+    }
+
+#define DEFINE_UNARY_BOOL_OPERATOR(OP)                                                        \
+    template <class T, class B>                                                               \
+    inline auto OP(const xmasked_value<T, B>& e)                                              \
+    {                                                                                         \
+        using std::OP;                                                                        \
+        return e.visible() ? masked_value(OP(e.value())) : masked<decltype(OP(e.value()))>(); \
+    }
+
+#define DEFINE_BINARY_OPERATOR(OP)                                                                       \
+    template <class T1, class B1, class T2, class B2>                                                    \
+    inline auto OP(const xmasked_value<T1, B1>& e1, const xmasked_value<T2, B2>& e2)                     \
+    {                                                                                                    \
+        using std::OP;                                                                                   \
+        return e1.visible() && e2.visible() ?                                                            \
+            masked_value(OP(e1.value(), e2.value())) :                                                   \
+            masked<decltype(OP(e1.value(), e2.value()))>();                                              \
+    }                                                                                                    \
+                                                                                                         \
+    template <class T1, class B1, class T2>                                                              \
+    inline auto OP(const xmasked_value<T1, B1>& e1, const T2& e2)                                        \
+    {                                                                                                    \
+        using std::OP;                                                                                   \
+        return e1.visible() ? masked_value(OP(e1.value(), e2)) : masked<decltype(OP(e1.value(), e2))>(); \
+    }                                                                                                    \
+                                                                                                         \
+    template <class T1, class T2, class B2>                                                              \
+    inline auto OP(const T1& e1, const xmasked_value<T2, B2>& e2)                                        \
+    {                                                                                                    \
+        using std::OP;                                                                                   \
+        return e2.visible() ? masked_value(OP(e1, e2.value())) : masked<decltype(OP(e1, e2.value()))>(); \
+    }
+
+#define DEFINE_TERNARY_OPERATOR_MMM(OP)                                                                               \
+    template <class T1, class B1, class T2, class B2, class T3, class B3>                                             \
+    inline auto OP(const xmasked_value<T1, B1>& e1, const xmasked_value<T2, B2>& e2, const xmasked_value<T3, B3>& e3) \
+    {                                                                                                                 \
+        using std::OP;                                                                                                \
+        return (e1.visible() && e2.visible() && e3.visible()) ?                                                       \
+                masked_value(OP(e1.value(), e2.value(), e3.value())) :                                                \
+                masked<decltype(OP(e1.value(), e2.value(), e3.value()))>();                                           \
+    }
+
+#define DEFINE_TERNARY_OPERATOR_MMT(OP)                                                              \
+    template <class T1, class B1, class T2, class B2, class T3>                                      \
+    inline auto OP(const xmasked_value<T1, B1>& e1, const xmasked_value<T2, B2>& e2, const T3& e3)   \
+    {                                                                                                \
+        using std::OP;                                                                               \
+        return (e1.visible() && e2.visible()) ?                                                      \
+                masked_value(OP(e1.value(), e2.value(), e3)) :                                       \
+                masked<decltype(OP(e1.value(), e2.value(), e3))>();                                  \
+    }
+
+#define DEFINE_TERNARY_OPERATOR_MTM(OP)                                                            \
+    template <class T1, class B1, class T2, class T3, class B3>                                    \
+    inline auto OP(const xmasked_value<T1, B1>& e1, const T2& e2, const xmasked_value<T3, B3>& e3) \
+    {                                                                                              \
+        using std::OP;                                                                             \
+        return (e1.visible() && e3.visible()) ?                                                    \
+                masked_value(OP(e1.value(), e2, e3.value())) :                                     \
+                masked<decltype(OP(e1.value(), e2, e3.value()))>();                                \
+    }
+
+#define DEFINE_TERNARY_OPERATOR_TMM(OP)                                                            \
+    template <class T1, class T2, class B2, class T3, class B3>                                    \
+    inline auto OP(const T1& e1, const xmasked_value<T2, B2>& e2, const xmasked_value<T3, B3>& e3) \
+    {                                                                                              \
+        using std::OP;                                                                             \
+        return (e2.visible() && e3.visible()) ?                                                    \
+                masked_value(OP(e1, e2.value(), e3.value())) :                                     \
+                masked<decltype(OP(e1, e2.value(), e3.value()))>();                                \
+    }
+
+#define DEFINE_TERNARY_OPERATOR_TTM(OP)                                         \
+    template <class T1, class T2, class T3, class B3>                           \
+    inline auto OP(const T1& e1, const T2& e2, const xmasked_value<T3, B3>& e3) \
+    {                                                                           \
+        using std::OP;                                                          \
+        return e3.visible() ?                                                   \
+            masked_value(OP(e1, e2, e3.value())) :                              \
+            masked<decltype(OP(e1, e2, e3.value()))>();                         \
+    }
+
+#define DEFINE_TERNARY_OPERATOR_TMT(OP)                                         \
+    template <class T1, class T2, class B2, class T3>                           \
+    inline auto OP(const T1& e1, const xmasked_value<T2, B2>& e2, const T3& e3) \
+    {                                                                           \
+        using std::OP;                                                          \
+        return e2.visible() ?                                                   \
+            masked_value(OP(e1, e2.value(), e3)) :                              \
+            masked<decltype(OP(e1, e2.value(), e3))>();                         \
+    }
+
+#define DEFINE_TERNARY_OPERATOR_MTT(OP)                                         \
+    template <class T1, class B1, class T2, class T3>                           \
+    inline auto OP(const xmasked_value<T1, B1>& e1, const T2& e2, const T3& e3) \
+    {                                                                           \
+        using std::OP;                                                          \
+        return e1.visible() ?                                                   \
+            masked_value(OP(e1.value(), e2, e3)) :                              \
+            masked<decltype(OP(e1.value(), e2, e3))>();                         \
+    }
+
+#define DEFINE_TERNARY_OPERATOR(OP) \
+    DEFINE_TERNARY_OPERATOR_MMM(OP) \
+                                    \
+    DEFINE_TERNARY_OPERATOR_MMT(OP) \
+    DEFINE_TERNARY_OPERATOR_MTM(OP) \
+    DEFINE_TERNARY_OPERATOR_TMM(OP) \
+    DEFINE_TERNARY_OPERATOR_TTM(OP) \
+    DEFINE_TERNARY_OPERATOR_TMT(OP) \
+    DEFINE_TERNARY_OPERATOR_MTT(OP)
+
+    DEFINE_OPERATOR(+);
+    DEFINE_OPERATOR(-);
+    DEFINE_OPERATOR(*);
+    DEFINE_OPERATOR(/);
+    DEFINE_OPERATOR(%);
+    DEFINE_BOOL_OPERATOR(||);
+    DEFINE_BOOL_OPERATOR(&&);
+    DEFINE_OPERATOR(&);
+    DEFINE_OPERATOR(|);
+    DEFINE_OPERATOR(^);
+    DEFINE_BOOL_OPERATOR(<);
+    DEFINE_BOOL_OPERATOR(<=);
+    DEFINE_BOOL_OPERATOR(>);
+    DEFINE_BOOL_OPERATOR(>=);
+    DEFINE_UNARY_OPERATOR(abs)
+    DEFINE_UNARY_OPERATOR(fabs)
+    DEFINE_UNARY_OPERATOR(exp)
+    DEFINE_UNARY_OPERATOR(exp2)
+    DEFINE_UNARY_OPERATOR(expm1)
+    DEFINE_UNARY_OPERATOR(log)
+    DEFINE_UNARY_OPERATOR(log10)
+    DEFINE_UNARY_OPERATOR(log2)
+    DEFINE_UNARY_OPERATOR(log1p)
+    DEFINE_UNARY_OPERATOR(sqrt)
+    DEFINE_UNARY_OPERATOR(cbrt)
+    DEFINE_UNARY_OPERATOR(sin)
+    DEFINE_UNARY_OPERATOR(cos)
+    DEFINE_UNARY_OPERATOR(tan)
+    DEFINE_UNARY_OPERATOR(acos)
+    DEFINE_UNARY_OPERATOR(asin)
+    DEFINE_UNARY_OPERATOR(atan)
+    DEFINE_UNARY_OPERATOR(sinh)
+    DEFINE_UNARY_OPERATOR(cosh)
+    DEFINE_UNARY_OPERATOR(tanh)
+    DEFINE_UNARY_OPERATOR(acosh)
+    DEFINE_UNARY_OPERATOR(asinh)
+    DEFINE_UNARY_OPERATOR(atanh)
+    DEFINE_UNARY_OPERATOR(erf)
+    DEFINE_UNARY_OPERATOR(erfc)
+    DEFINE_UNARY_OPERATOR(tgamma)
+    DEFINE_UNARY_OPERATOR(lgamma)
+    DEFINE_UNARY_OPERATOR(ceil)
+    DEFINE_UNARY_OPERATOR(floor)
+    DEFINE_UNARY_OPERATOR(trunc)
+    DEFINE_UNARY_OPERATOR(round)
+    DEFINE_UNARY_OPERATOR(nearbyint)
+    DEFINE_UNARY_OPERATOR(rint)
+    DEFINE_UNARY_BOOL_OPERATOR(isfinite)
+    DEFINE_UNARY_BOOL_OPERATOR(isinf)
+    DEFINE_UNARY_BOOL_OPERATOR(isnan)
+    DEFINE_BINARY_OPERATOR(fmod)
+    DEFINE_BINARY_OPERATOR(remainder)
+    DEFINE_BINARY_OPERATOR(fmax)
+    DEFINE_BINARY_OPERATOR(fmin)
+    DEFINE_BINARY_OPERATOR(fdim)
+    DEFINE_BINARY_OPERATOR(pow)
+    DEFINE_BINARY_OPERATOR(hypot)
+    DEFINE_BINARY_OPERATOR(atan2)
+    DEFINE_TERNARY_OPERATOR(fma)
+
+#undef DEFINE_TERNARY_OPERATOR
+#undef DEFINE_TERNARY_OPERATOR_MMM
+#undef DEFINE_TERNARY_OPERATOR_MMT
+#undef DEFINE_TERNARY_OPERATOR_MTM
+#undef DEFINE_TERNARY_OPERATOR_TMM
+#undef DEFINE_TERNARY_OPERATOR_TTM
+#undef DEFINE_TERNARY_OPERATOR_TMT
+#undef DEFINE_TERNARY_OPERATOR_MTT
+#undef DEFINE_BINARY_OPERATOR
+#undef DEFINE_UNARY_OPERATOR
+#undef DEFINE_UNARY_BOOL_OPERATOR
+#undef DEFINE_OPERATOR
+#undef DEFINE_BOOL_OPERATOR
+}
+
+#endif

+ 41 - 0
3rd/numpy/include/xtl/xmasked_value_meta.hpp

@@ -0,0 +1,41 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
+* Martin Renou                                                             *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XMASKED_VALUE_META_HPP
+#define XTL_XMASKED_VALUE_META_HPP
+
+#include <type_traits>
+
+namespace xtl
+{
+    template <class T, class B = bool>
+    class xmasked_value;
+
+    namespace detail
+    {
+        template <class E>
+        struct is_xmasked_value_impl : std::false_type
+        {
+        };
+
+        template <class T, class B>
+        struct is_xmasked_value_impl<xmasked_value<T, B>> : std::true_type
+        {
+        };
+    }
+
+    template <class E>
+    using is_xmasked_value = detail::is_xmasked_value_impl<E>;
+
+    template <class E, class R>
+    using disable_xmasked_value = std::enable_if_t<!is_xmasked_value<E>::value, R>;
+}
+
+#endif

+ 640 - 0
3rd/numpy/include/xtl/xmeta_utils.hpp

@@ -0,0 +1,640 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XMETA_UTILS_HPP
+#define XTL_XMETA_UTILS_HPP
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "xfunctional.hpp"
+#include "xtl_config.hpp"
+
+namespace xtl
+{
+    // TODO move to a xutils if we have one
+
+    // gcc 4.9 is affected by C++14 defect CGW 1558
+    // see http://open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#1558
+    template <class... T>
+    struct make_void
+    {
+        using type = void;
+    };
+
+    template <class... T>
+    using void_t = typename make_void<T...>::type;
+
+    namespace mpl
+    {
+        /*************
+         * mpl types *
+         *************/
+
+        template <class... T>
+        struct vector
+        {
+        };
+
+        template <bool B>
+        using bool_ = std::integral_constant<bool, B>;
+
+        template <std::size_t S>
+        using size_t_ = std::integral_constant<std::size_t, S>;
+
+        /*******
+         * if_ *
+         *******/
+
+        template <bool B, class T, class F>
+        struct if_c : std::conditional<B, T, F>
+        {
+        };
+
+        template <bool B, class T, class F>
+        using if_c_t = typename if_c<B, T, F>::type;
+
+        template <class B, class T, class F>
+        struct if_ : if_c<B::value, T, F>
+        {
+        };
+
+        template <class B, class T, class F>
+        using if_t = typename if_<B, T, F>::type;
+
+        /***********
+         * eval_if *
+         ***********/
+
+        template <bool B, class T, class F>
+        struct eval_if_c
+        {
+            using type = typename T::type;
+        };
+
+        template <class T, class F>
+        struct eval_if_c<false, T, F>
+        {
+            using type = typename F::type;
+        };
+
+        template <class B, class T, class F>
+        struct eval_if : eval_if_c<B::value, T, F>
+        {
+        };
+
+        template <class B, class T, class F>
+        using eval_if_t = typename eval_if<B, T, F>::type;
+
+        /********
+         * cast *
+         ********/
+
+        namespace detail
+        {
+            template <class A, template <class...> class B>
+            struct cast_impl;
+
+            template <template <class...> class A, class... T, template <class...> class B>
+            struct cast_impl<A<T...>, B>
+            {
+                using type = B<T...>;
+            };
+        }
+
+        template <class A, template <class...> class B>
+        struct cast : detail::cast_impl<A, B>
+        {
+        };
+
+        template <class A, template <class...> class B>
+        using cast_t = typename cast<A, B>::type;
+
+        /********
+         * size *
+         ********/
+
+        namespace detail
+        {
+            template <class L>
+            struct size_impl;
+
+            template <template <class...> class F, class... T>
+            struct size_impl<F<T...>> : size_t_<sizeof...(T)>
+            {
+            };
+        }
+
+        template <class L>
+        struct size : detail::size_impl<L>
+        {
+        };
+
+        /*********
+         * empty *
+         *********/
+
+        namespace detail
+        {
+            template <class L>
+            struct empty_impl;
+
+            template <template <class...> class F, class... T>
+            struct empty_impl<F<T...>> : bool_<sizeof...(T) == std::size_t(0)>
+            {
+            };
+        }
+
+        template <class L>
+        struct empty : detail::empty_impl<L>
+        {
+        };
+
+        template <class L>
+        using empty_t = typename empty<L>::type;
+
+        /********
+         * plus *
+         ********/
+
+        namespace detail
+        {
+            template <class... T>
+            struct plus_impl;
+
+            template <>
+            struct plus_impl<> : size_t_<0>
+            {
+            };
+
+            template <class T1, class... T>
+            struct plus_impl<T1, T...> : size_t_<T1::value + plus_impl<T...>::value>
+            {
+            };
+        }
+
+        template <class... T>
+        struct plus : detail::plus_impl<T...>
+        {
+        };
+
+        /*********
+         * count *
+         *********/
+
+        namespace detail
+        {
+            template <class L, class V>
+            struct count_impl;
+
+            template <template <class...> class L, class... T, class V>
+            struct count_impl<L<T...>, V> : plus<std::is_same<T, V>...>
+            {
+            };
+        }
+
+        template <class L, class V>
+        struct count : detail::count_impl<L, V>
+        {
+        };
+
+        /************
+         * count_if *
+         ************/
+
+        namespace detail
+        {
+            template <class L, template <class> class P>
+            struct count_if_impl;
+
+            template <template <class...> class L, class... T, template <class> class P>
+            struct count_if_impl<L<T...>, P> : plus<P<T>...>
+            {
+            };
+        }
+
+        template <class L, template <class> class P>
+        struct count_if : detail::count_if_impl<L, P>
+        {
+        };
+
+        /************
+         * index_of *
+         ************/
+
+        namespace detail
+        {
+            template <class L, class V>
+            struct index_of_impl;
+
+            template <template <class...> class L, class V>
+            struct index_of_impl<L<>, V>
+            {
+                static constexpr size_t value = SIZE_MAX;
+            };
+
+            template <template <class...> class L, class... T, class V>
+            struct index_of_impl<L<V, T...>, V>
+            {
+                static constexpr size_t value = 0u;
+            };
+
+            template <template <class...> class L, class U, class... T, class V>
+            struct index_of_impl<L<U, T...>, V>
+            {
+                static constexpr size_t tmp = index_of_impl<L<T...>, V>::value;
+                static constexpr size_t value = tmp == SIZE_MAX ? SIZE_MAX : 1u + tmp;
+            };
+        }
+
+        template <class L, class T>
+        struct index_of : detail::index_of_impl<L, T>
+        {
+        };
+
+        /************
+         * contains *
+         ************/
+
+        namespace detail
+        {
+            template <class L, class V>
+            struct contains_impl;
+
+            template <template <class...> class L, class V>
+            struct contains_impl<L<>, V> : std::false_type
+            {
+            };
+
+            template <template <class...> class L, class... T, class V>
+            struct contains_impl<L<V, T...>, V> : std::true_type
+            {
+            };
+
+            template <template <class...> class L, class U, class... T, class V>
+            struct contains_impl<L<U, T...>, V> : contains_impl<L<T...>, V>
+            {
+            };
+        }
+
+        template <class L, class V>
+        struct contains : detail::contains_impl<L, V>
+        {
+        };
+
+        /*********
+         * front *
+         *********/
+
+        namespace detail
+        {
+            template <class L>
+            struct front_impl;
+
+            template <template <class...> class L, class T, class... U>
+            struct front_impl<L<T, U...>>
+            {
+                using type = T;
+            };
+        }
+
+        template <class L>
+        struct front : detail::front_impl<L>
+        {
+        };
+
+        template <class L>
+        using front_t = typename front<L>::type;
+
+        /********
+         * back *
+         ********/
+
+        namespace detail
+        {
+            template <class L>
+            struct back_impl;
+
+            template <template <class...> class L, class T>
+            struct back_impl<L<T>>
+            {
+                using type = T;
+            };
+
+            // Compilation time improvement
+            template <template <class...> class L, class T1, class T2>
+            struct back_impl<L<T1, T2>>
+            {
+                using type = T2;
+            };
+
+            template <template <class...> class L, class T1, class T2, class T3>
+            struct back_impl<L<T1, T2, T3>>
+            {
+                using type = T3;
+            };
+
+            template <template <class...> class L, class T1, class T2, class T3, class T4>
+            struct back_impl<L<T1, T2, T3, T4>>
+            {
+                using type = T4;
+            };
+
+            template <template <class...> class L, class T, class... U>
+            struct back_impl<L<T, U...>> : back_impl<L<U...>>
+            {
+            };
+        }
+
+        template <class L>
+        struct back : detail::back_impl<L>
+        {
+        };
+
+        template <class L>
+        using back_t = typename back<L>::type;
+
+        /**************
+         * push_front *
+         **************/
+
+        namespace detail
+        {
+            template <class L, class... T>
+            struct push_front_impl;
+
+            template <template <class...> class L, class... U, class... T>
+            struct push_front_impl<L<U...>, T...>
+            {
+                using type = L<T..., U...>;
+            };
+        }
+
+        template <class L, class... T>
+        struct push_front : detail::push_front_impl<L, T...>
+        {
+        };
+
+        template <class L, class... T>
+        using push_front_t = typename push_front<L, T...>::type;
+
+        /*************
+         * push_back *
+         *************/
+
+        namespace detail
+        {
+            template <class L, class... T>
+            struct push_back_impl;
+
+            template <template <class...> class L, class... U, class... T>
+            struct push_back_impl<L<U...>, T...>
+            {
+                using type = L<U..., T...>;
+            };
+        }
+
+        template <class L, class... T>
+        struct push_back : detail::push_back_impl<L, T...>
+        {
+        };
+
+        template <class L, class... T>
+        using push_back_t = typename push_back<L, T...>::type;
+
+        /*************
+         * pop_front *
+         *************/
+
+        namespace detail
+        {
+            template <class L>
+            struct pop_front_impl;
+
+            template <template <class...> class L, class T, class... U>
+            struct pop_front_impl<L<T, U...>>
+            {
+                using type = L<U...>;
+            };
+        }
+
+        template <class L>
+        struct pop_front : detail::pop_front_impl<L>
+        {
+        };
+
+        template <class L>
+        using pop_front_t = typename pop_front<L>::type;
+
+        /*************
+         * transform *
+         *************/
+
+        namespace detail
+        {
+            template <template <class...> class F, class L>
+            struct transform_impl;
+
+            template <template <class...> class F, template <class...> class L, class... T>
+            struct transform_impl<F, L<T...>>
+            {
+                using type = L<F<T>...>;
+            };
+        }
+
+        template <template <class...> class F, class L>
+        struct transform : detail::transform_impl<F, L>
+        {
+        };
+
+        template <template <class...> class F, class L>
+        using transform_t = typename transform<F, L>::type;
+
+        /*************
+         * merge_set *
+         *************/
+
+        namespace detail
+        {
+            template <class S1, class S2>
+            struct merge_set_impl;
+
+            template <template <class...> class L, class... T>
+            struct merge_set_impl<L<T...>, L<>>
+            {
+                using type = L<T...>;
+            };
+
+            template <template <class...> class L, class... T, class U1, class... U>
+            struct merge_set_impl<L<T...>, L<U1, U...>>
+            {
+                using type = typename merge_set_impl<if_t<contains<L<T...>, U1>,
+                                                          L<T...>,
+                                                          L<T..., U1>>,
+                                                     L<U...>>::type;
+            };
+        }
+
+        template <class S1, class S2>
+        struct merge_set : detail::merge_set_impl<S1, S2>
+        {
+        };
+
+        template <class S1, class S2>
+        using merge_set_t = typename merge_set<S1, S2>::type;
+
+        /***********
+         * find_if *
+         ***********/
+
+        template <template <class> class Test, class L>
+        struct find_if;
+
+        namespace detail
+        {
+            template <template <class> class Test, std::size_t I, class... T>
+            struct find_if_impl;
+
+            template <template <class> class Test, std::size_t I>
+            struct find_if_impl<Test, I> : size_t_<I>
+            {
+            };
+
+            template <template <class> class Test, std::size_t I, class T0, class... T>
+            struct find_if_impl<Test, I, T0, T...> : std::conditional_t<Test<T0>::value,
+                                                                        size_t_<I>,
+                                                                        find_if_impl<Test, I + 1, T...>>
+            {
+            };
+        }
+
+        template <template <class> class Test, template <class...> class L, class... T>
+        struct find_if<Test, L<T...>> : detail::find_if_impl<Test, 0, T...>
+        {
+        };
+
+        /*********
+         * split *
+         *********/
+
+        namespace detail
+        {
+            template <std::size_t N, class L1, class L2>
+            struct transfer
+            {
+                using new_l1 = push_back_t<L1, front_t<L2>>;
+                using new_l2 = pop_front_t<L2>;
+                using new_transfer = transfer<N - 1, new_l1, new_l2>;
+                using first_type = typename new_transfer::first_type;
+                using second_type = typename new_transfer::second_type;
+            };
+
+            template <class L1, class L2>
+            struct transfer<0, L1, L2>
+            {
+                using first_type = L1;
+                using second_type = L2;
+            };
+
+            template <std::size_t N, class L>
+            struct split_impl
+            {
+                using tr_type = transfer<N, vector<>, L>;
+                using first_type = typename tr_type::first_type;
+                using second_type = typename tr_type::second_type;
+            };
+        }
+
+        template <std::size_t N, class L>
+        struct split : detail::split_impl<N, L>
+        {
+        };
+
+        /**********
+         * unique *
+         **********/
+
+        namespace detail
+        {
+            template <class L>
+            struct unique_impl;
+
+            template <template <class...> class L, class... T>
+            struct unique_impl<L<T...>>
+            {
+                using type = merge_set_t<L<>, L<T...>>;
+            };
+        }
+
+        template <class L>
+        struct unique : detail::unique_impl<L>
+        {
+        };
+
+        template <class L>
+        using unique_t = typename unique<L>::type;
+
+        /*************
+         * static_if *
+         *************/
+
+        template <class TF, class FF>
+        decltype(auto) static_if(std::true_type, const TF& tf, const FF&)
+        {
+            return tf(identity());
+        }
+
+        template <class TF, class FF>
+        decltype(auto) static_if(std::false_type, const TF&, const FF& ff)
+        {
+            return ff(identity());
+        }
+
+        template <bool cond, class TF, class FF>
+        decltype(auto) static_if(const TF& tf, const FF& ff)
+        {
+            return static_if(std::integral_constant<bool, cond>(), tf, ff);
+        }
+
+        /***********
+         * switch_ *
+         ***********/
+
+        using default_t = std::true_type;
+
+        namespace detail
+        {
+            template <class... T>
+            struct switch_impl;
+
+            template <class C, class T, class... U>
+            struct switch_impl<C, T, U...>
+                : std::conditional<C::value, T, typename switch_impl<U...>::type>
+            {
+            };
+
+            template <class C, class T, class U>
+            struct switch_impl<C, T, default_t, U>
+                : std::conditional<C::value, T, U>
+            {
+            };
+        }
+
+        template <class... T>
+        struct switch_ : detail::switch_impl<T...>
+        {
+        };
+
+        template <class... T>
+        using switch_t = typename switch_<T...>::type;
+    }
+}
+
+#endif

+ 422 - 0
3rd/numpy/include/xtl/xmultimethods.hpp

@@ -0,0 +1,422 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_MULTIMETHODS_HPP
+#define XTL_MULTIMETHODS_HPP
+
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <map>
+#include <typeindex>
+#include <type_traits>
+#include <vector>
+
+#include "xmeta_utils.hpp"
+
+namespace xtl
+{
+    // Loki's multimethods ported to modern C++ and generalized to N arguments
+    // Original implementation can be found at
+    // https://github.com/snaewe/loki-lib/blob/master/include/loki/MultiMethods.h
+
+    struct symmetric_dispatch {};
+    struct antisymmetric_dispatch {};
+
+    /*********************
+     * static_dispatcher *
+     *********************/
+
+    template
+    <
+        class executor,
+        class base_lhs,
+        class lhs_type_list,
+        class return_type = void,
+        class symmetric = antisymmetric_dispatch,
+        class base_rhs = base_lhs,
+        class rhs_type_list = lhs_type_list
+    >
+    class static_dispatcher
+    {
+    private:
+
+        template <class lhs_type, class rhs_type>
+        static return_type invoke_executor(lhs_type& lhs,
+                                           rhs_type& rhs,
+                                           executor& exec,
+                                           std::false_type)
+        {
+            return exec.run(lhs, rhs);
+        }
+
+        template <class lhs_type, class rhs_type>
+        static return_type invoke_executor(lhs_type& lhs,
+                                           rhs_type& rhs,
+                                           executor& exec,
+                                           std::true_type)
+        {
+            return exec.run(rhs, lhs);
+        }
+
+        template <class lhs_type>
+        static return_type dispatch_rhs(lhs_type& lhs,
+                                        base_rhs& rhs,
+                                        executor& exec,
+                                        mpl::vector<>)
+        {
+            return exec.on_error(lhs, rhs);
+        }
+
+        template <class lhs_type, class T, class... U>
+        static return_type dispatch_rhs(lhs_type& lhs,
+                                        base_rhs& rhs,
+                                        executor& exec,
+                                        mpl::vector<T, U...>)
+        {
+            if (T* p = dynamic_cast<T*>(&rhs))
+            {
+                constexpr std::size_t lhs_index = mpl::index_of<lhs_type_list, lhs_type>::value;
+                constexpr std::size_t rhs_index = mpl::index_of<rhs_type_list, T>::value;
+
+                using invoke_flag = std::integral_constant<bool,
+                    std::is_same<symmetric, symmetric_dispatch>::value && (rhs_index < lhs_index)>;
+                return invoke_executor(lhs, *p, exec, invoke_flag());
+            }
+            return dispatch_rhs(lhs, rhs, exec, mpl::vector<U...>());
+        }
+
+        static return_type dispatch_lhs(base_lhs& lhs,
+                                        base_rhs& rhs,
+                                        executor& exec,
+                                        mpl::vector<>)
+        {
+            return exec.on_error(lhs, rhs);
+        }
+
+        template <class T, class... U>
+        static return_type dispatch_lhs(base_lhs& lhs,
+                                        base_rhs& rhs,
+                                        executor& exec,
+                                        mpl::vector<T, U...>)
+        {
+            if (T* p = dynamic_cast<T*>(&lhs))
+            {
+                return dispatch_rhs(*p, rhs, exec, rhs_type_list());
+            }
+            return dispatch_lhs(lhs, rhs, exec, mpl::vector<U...>());
+        }
+
+    public:
+
+        static return_type dispatch(base_lhs& lhs, base_rhs& rhs, executor& exec)
+        {
+            return dispatch_lhs(lhs, rhs, exec, lhs_type_list());
+        }
+    };
+
+    // TODO: generalize to N-D with mpl::vector of mpl:vector
+    // Warning: this is hardcore ;)
+
+    /********************
+     * basic_dispatcher *
+     ********************/
+
+    template
+    <
+        class type_list,
+        class return_type,
+        class undispatched_type_list,
+        class callback_type
+    >
+    class basic_dispatcher;
+
+    template
+    <
+        class return_type,
+        class callback_type,
+        class... B,
+        class... T
+    >
+    class basic_dispatcher<mpl::vector<B...>, return_type, mpl::vector<T...>, callback_type>
+    {
+    private:
+
+        using key_type = std::array<std::type_index, sizeof...(B)>;
+        using map_type = std::map<key_type, callback_type>;
+        map_type m_callback_map;
+
+        template <class... U>
+        key_type make_key() const
+        {
+            return {{std::type_index(typeid(U))...}};
+        }
+
+    public:
+
+        template <class... D>
+        void insert(callback_type&& cb)
+        {
+            static_assert(sizeof...(D) == sizeof...(B),
+                          "Number of callback arguments must match dispatcher dimension");
+            m_callback_map[make_key<D...>()] = std::move(cb);
+        }
+
+        template <class... D>
+        void erase()
+        {
+            static_assert(sizeof...(D) == sizeof...(B),
+                          "Number of callback arguments must match dispatcher dimension");
+            m_callback_map.erase(make_key<D...>());
+        }
+
+        inline return_type dispatch(B&... args, T&... udargs) const
+        {
+            key_type k = {{std::type_index(typeid(args))...}};
+            auto it = m_callback_map.find(k);
+            if (it == m_callback_map.end())
+            {
+                XTL_THROW(std::runtime_error, "callback not found");
+            }
+            return (it->second)(args..., udargs...);
+        }
+    };
+
+    /*************************
+     * basic_fast_dispatcher *
+     *************************/
+
+#define XTL_IMPLEMENT_INDEXABLE_CLASS()         \
+    static std::size_t& get_class_static_index()\
+    {                                           \
+        static std::size_t index = SIZE_MAX;    \
+        return index;                           \
+    }                                           \
+    virtual std::size_t get_class_index() const \
+    {                                           \
+        return get_class_static_index();        \
+    }
+
+    namespace detail
+    {
+        template <class T>
+        class recursive_container_impl : private std::vector<T>
+        {
+        public:
+
+            using base_type = std::vector<T>;
+
+            using base_type::base_type;
+            using base_type::operator[];
+            using base_type::size;
+            using base_type::resize;
+        };
+
+        template <class callback_type, std::size_t level>
+        class recursive_container
+            : public recursive_container_impl<recursive_container<callback_type, level-1>>
+        {
+        };
+
+        template <class callback>
+        class recursive_container<callback, 0>
+            : public recursive_container_impl<callback>
+        {
+        };
+    }
+
+    template
+    <
+        class type_list,
+        class return_type,
+        class undispatched_type_list,
+        class callback_type
+    >
+    class basic_fast_dispatcher;
+
+    template
+    <
+        class return_type,
+        class callback_type,
+        class... B,
+        class... T
+    >
+    class basic_fast_dispatcher<mpl::vector<B...>, return_type, mpl::vector<T...>, callback_type>
+    {
+    private:
+
+        static constexpr std::size_t nb_args = sizeof...(B);
+
+        using storage_type = detail::recursive_container<callback_type, sizeof...(B) - 1>;
+        using index_type = std::array<std::size_t, nb_args>;
+        using index_ref_type = std::array<std::reference_wrapper<std::size_t>, nb_args>;
+
+        storage_type m_callbacks;
+        std::size_t m_next_index;
+
+        template <std::size_t I, class C>
+        void resize_container(C& c, const index_ref_type& index)
+        {
+            std::size_t& idx = index[I];
+            if (idx == SIZE_MAX)
+            {
+                c.resize(++m_next_index);
+                idx = c.size() - 1u;
+            }
+            else if(c.size() <= idx)
+            {
+                c.resize(idx + 1u);
+            }
+        }
+
+        template <std::size_t I, class C>
+        std::enable_if_t<I + 1 == nb_args>
+        insert_impl(callback_type&& cb, C& c, const index_ref_type& index)
+        {
+            resize_container<I>(c, index);
+            c[index[I]] = std::move(cb);
+        }
+
+        template <std::size_t I, class C>
+        std::enable_if_t<I + 1 != nb_args>
+        insert_impl(callback_type&& cb, C& c, const index_ref_type& index)
+        {
+            resize_container<I>(c, index);
+            insert_impl<I+1>(std::move(cb), c[index[I]], index);
+        }
+
+        template <std::size_t I, class C>
+        void check_size(C& c, const index_type& index) const
+        {
+            if (index[I] >= c.size())
+            {
+                XTL_THROW(std::runtime_error, "callback not found");
+            }
+        }
+
+        template <std::size_t I, class C>
+        std::enable_if_t<I + 1 == nb_args, return_type>
+        dispatch_impl(C& c, const index_type& index, B&... args, T&... udargs) const
+        {
+            check_size<I>(c, index);
+            return c[index[I]](args..., udargs...);
+        }
+
+        template <std::size_t I, class C>
+        std::enable_if_t<I + 1 != nb_args, return_type>
+        dispatch_impl(C& c, const index_type& index, B&... args, T&... udargs) const
+        {
+            check_size<I>(c, index);
+            return dispatch_impl<I+1>(c[index[I]], index, args..., udargs...);
+        }
+
+    public:
+
+        inline basic_fast_dispatcher()
+            : m_next_index(0)
+        {
+        }
+
+        template <class... D>
+        void insert(callback_type&& cb)
+        {
+            static_assert(sizeof...(D) == sizeof...(B),
+                          "Number of callback arguments must match dispatcher dimension");
+            index_ref_type index = {{std::ref(D::get_class_static_index())...}};
+            insert_impl<0>(std::move(cb), m_callbacks, index);
+        }
+
+        inline return_type dispatch(B&... args, T&... udargs) const
+        {
+            index_type index = {{args.get_class_index()...}};
+            return dispatch_impl<0>(m_callbacks, index, args..., udargs...);
+        }
+    };
+
+    /******************************
+     * dynamic and static casters *
+     ******************************/
+
+    template <class T, class F>
+    struct static_caster
+    {
+        static T& cast(F& f)
+        {
+            return static_cast<T&>(f);
+        }
+    };
+
+    template <class T, class F>
+    struct dynamic_caster
+    {
+        static T& cast(F& f)
+        {
+            return dynamic_cast<T&>(f);
+        }
+    };
+
+    /**********************
+     * functor_dispatcher *
+     **********************/
+
+    template
+    <
+        class type_list,
+        class return_type,
+        class undispatched_type = mpl::vector<>,
+        template <class, class> class casting_policy = dynamic_caster,
+        template <class, class, class, class> class dispatcher = basic_dispatcher
+    >
+    class functor_dispatcher;
+
+    template
+    <
+        class return_type,
+        template <class, class> class casting_policy,
+        template <class, class, class, class> class dispatcher,
+        class... B,
+        class... T
+    >
+    class functor_dispatcher<mpl::vector<B...>, return_type, mpl::vector<T...>, casting_policy, dispatcher>
+    {
+    private:
+
+        using functor_type = std::function<return_type (B&..., T&...)>;
+        using backend = dispatcher<mpl::vector<B...>,
+                                   return_type,
+                                   mpl::vector<T...>,
+                                   functor_type>;
+        backend m_backend;
+
+    public:
+
+        template <class... D, class Fun>
+        void insert(const Fun& fun)
+        {
+            functor_type f([fun](B&... args, T&... udargs) -> return_type
+            {
+                return fun(casting_policy<D&, B&>::cast(args)..., udargs...);
+            });
+            m_backend.template insert<D...>(std::move(f));
+        }
+
+        template <class... D>
+        void erase()
+        {
+            m_backend.template erase<D...>();
+        }
+
+        inline return_type dispatch(B&... args, T&... udargs) const
+        {
+            return m_backend.dispatch(args..., udargs...);
+        }
+    };
+}
+
+#endif

+ 1331 - 0
3rd/numpy/include/xtl/xoptional.hpp

@@ -0,0 +1,1331 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_OPTIONAL_HPP
+#define XTL_OPTIONAL_HPP
+
+#include <cmath>
+#include <ostream>
+#include <type_traits>
+#include <utility>
+
+#ifdef __CLING__
+#include <nlohmann/json.hpp>
+#endif
+
+#include "xoptional_meta.hpp"
+#include "xclosure.hpp"
+#include "xfunctional.hpp"
+#include "xmeta_utils.hpp"
+#include "xtl_config.hpp"
+#include "xtype_traits.hpp"
+
+namespace xtl
+{
+    template <class T, class B>
+    auto optional(T&& t, B&& b) noexcept;
+
+    /************************
+     * optional declaration *
+     ************************/
+
+    /**
+     * @class xoptional
+     * @brief Optional value handler.
+     *
+     * The xoptional is an optional proxy. It holds a value (or a reference on a value) and a flag (or reference on a flag)
+     * indicating whether the element should be considered missing.
+     *
+     * xoptional is different from std::optional
+     *
+     *  - no `operator->()` that returns a pointer.
+     *  - no `operator*()` that returns a value.
+     *
+     * The only way to access the underlying value and flag is with the `value` and `value_or` methods.
+     *
+     *  - no explicit convertion to bool. This may lead to confusion when the underlying value type is boolean too.
+     *
+     * @tparam CT Closure type for the value.
+     * @tparam CB Closure type for the missing flag. A falsy flag means that the value is missing.
+     *
+     * \ref xoptional is used both as a value type (with CT and CB being value types) and reference type for containers
+     * with CT and CB being reference types. In other words, it serves as a reference proxy.
+     *
+     */
+    template <class CT, class CB>
+    class xoptional
+    {
+    public:
+
+        using self_type = xoptional<CT, CB>;
+        using value_closure = CT;
+        using flag_closure = CB;
+
+        using value_type = std::decay_t<CT>;
+        using flag_type = std::decay_t<CB>;
+
+        // Constructors
+        inline xoptional()
+            : m_value(), m_flag(false)
+        {
+        }
+
+        template <class T,
+          std::enable_if_t<
+            conjunction<
+              negation<std::is_same<xoptional<CT, CB>, std::decay_t<T>>>,
+              std::is_constructible<CT, T&&>,
+              std::is_convertible<T&&, CT>
+            >::value,
+            bool
+          > = true>
+        inline constexpr xoptional(T&& rhs)
+            : m_value(std::forward<T>(rhs)), m_flag(true)
+        {
+        }
+
+        template <class T,
+          std::enable_if_t<
+            conjunction<
+              negation<std::is_same<xoptional<CT, CB>, std::decay_t<T>>>,
+              std::is_constructible<CT, T&&>,
+              negation<std::is_convertible<T&&, CT>>
+            >::value,
+            bool
+          > = false>
+        inline explicit constexpr xoptional(T&& value)
+            : m_value(std::forward<T>(value)), m_flag(true)
+        {
+        }
+
+        template <class CTO, class CBO,
+          std::enable_if_t<
+            conjunction<
+              negation<std::is_same<xoptional<CT, CB>, xoptional<CTO, CBO>>>,
+              std::is_constructible<CT, std::add_lvalue_reference_t<std::add_const_t<CTO>>>,
+              std::is_constructible<CB, std::add_lvalue_reference_t<std::add_const_t<CBO>>>,
+              conjunction<
+                std::is_convertible<std::add_lvalue_reference_t<std::add_const_t<CTO>>, CT>,
+                std::is_convertible<std::add_lvalue_reference_t<std::add_const_t<CBO>>, CB>
+              >,
+              negation<detail::converts_from_xoptional<CT, CTO, CBO>>
+            >::value,
+            bool
+          > = true>
+        inline constexpr xoptional(const xoptional<CTO, CBO>& rhs)
+            : m_value(rhs.value()), m_flag(rhs.has_value())
+        {
+        }
+
+        template <class CTO, class CBO,
+          std::enable_if_t<
+            conjunction<
+              negation<std::is_same<xoptional<CT, CB>, xoptional<CTO, CBO>>>,
+              std::is_constructible<CT, std::add_lvalue_reference_t<std::add_const_t<CTO>>>,
+              std::is_constructible<CB, std::add_lvalue_reference_t<std::add_const_t<CBO>>>,
+              disjunction<
+                negation<std::is_convertible<std::add_lvalue_reference_t<std::add_const_t<CTO>>, CT>>,
+                negation<std::is_convertible<std::add_lvalue_reference_t<std::add_const_t<CBO>>, CB>>
+              >,
+              negation<detail::converts_from_xoptional<CT, CTO, CBO>>
+            >::value,
+            bool
+          > = false>
+        inline explicit constexpr xoptional(const xoptional<CTO, CBO>& rhs)
+            : m_value(rhs.value()), m_flag(rhs.has_value())
+        {
+        }
+
+        template <class CTO, class CBO,
+          std::enable_if_t<
+            conjunction<
+              negation<std::is_same<xoptional<CT, CB>, xoptional<CTO, CBO>>>,
+              std::is_constructible<CT, std::conditional_t<std::is_reference<CT>::value, const std::decay_t<CTO>&, std::decay_t<CTO>&&>>,
+              std::is_constructible<CB, std::conditional_t<std::is_reference<CB>::value, const std::decay_t<CBO>&, std::decay_t<CBO>&&>>,
+              conjunction<
+                std::is_convertible<std::conditional_t<std::is_reference<CT>::value, const std::decay_t<CTO>&, std::decay_t<CTO>&&>, CT>,
+                std::is_convertible<std::conditional_t<std::is_reference<CB>::value, const std::decay_t<CBO>&, std::decay_t<CBO>&&>, CB>
+              >,
+              negation<detail::converts_from_xoptional<CT, CTO, CBO>>
+            >::value,
+            bool
+          > = true>
+        inline constexpr xoptional(xoptional<CTO, CBO>&& rhs)
+            : m_value(std::move(rhs).value()), m_flag(std::move(rhs).has_value())
+        {
+        }
+
+        template <class CTO, class CBO,
+          std::enable_if_t<
+            conjunction<
+              negation<std::is_same<xoptional<CT, CB>, xoptional<CTO, CBO>>>,
+              std::is_constructible<CT, std::conditional_t<std::is_reference<CT>::value, const std::decay_t<CTO>&, std::decay_t<CTO>&&>>,
+              std::is_constructible<CB, std::conditional_t<std::is_reference<CB>::value, const std::decay_t<CBO>&, std::decay_t<CBO>&&>>,
+              disjunction<
+                negation<std::is_convertible<std::conditional_t<std::is_reference<CT>::value, const std::decay_t<CTO>&, std::decay_t<CTO>&&>, CT>>,
+                negation<std::is_convertible<std::conditional_t<std::is_reference<CB>::value, const std::decay_t<CBO>&, std::decay_t<CBO>&&>, CB>>
+              >,
+              negation<detail::converts_from_xoptional<CT, CTO, CBO>>
+            >::value,
+            bool
+          > = false>
+        inline explicit constexpr xoptional(xoptional<CTO, CBO>&& rhs)
+            : m_value(std::move(rhs).value()), m_flag(std::move(rhs).has_value())
+        {
+        }
+
+        xoptional(value_type&&, flag_type&&);
+        xoptional(std::add_lvalue_reference_t<CT>, std::add_lvalue_reference_t<CB>);
+        xoptional(value_type&&, std::add_lvalue_reference_t<CB>);
+        xoptional(std::add_lvalue_reference_t<CT>, flag_type&&);
+
+        // Assignment
+        template <class T>
+        std::enable_if_t<
+          conjunction<
+            negation<std::is_same<xoptional<CT, CB>, std::decay_t<T>>>,
+            std::is_assignable<std::add_lvalue_reference_t<CT>, T>
+          >::value,
+         xoptional&>
+        inline operator=(T&& rhs)
+        {
+            m_value = std::forward<T>(rhs);
+            m_flag = true;
+            return *this;
+        }
+
+        template <class CTO, class CBO>
+        std::enable_if_t<conjunction<
+          negation<std::is_same<xoptional<CT, CB>, xoptional<CTO, CBO>>>,
+          std::is_assignable<std::add_lvalue_reference_t<CT>, CTO>,
+          negation<detail::converts_from_xoptional<CT, CTO, CBO>>,
+          negation<detail::assigns_from_xoptional<CT, CTO, CBO>>
+        >::value,
+        xoptional&>
+        inline operator=(const xoptional<CTO, CBO>& rhs)
+        {
+            m_value = rhs.value();
+            m_flag = rhs.has_value();
+            return *this;
+        }
+
+        template <class CTO, class CBO>
+        std::enable_if_t<conjunction<
+          negation<std::is_same<xoptional<CT, CB>, xoptional<CTO, CBO>>>,
+          std::is_assignable<std::add_lvalue_reference_t<CT>, CTO>,
+          negation<detail::converts_from_xoptional<CT, CTO, CBO>>,
+          negation<detail::assigns_from_xoptional<CT, CTO, CBO>>
+        >::value,
+        xoptional&>
+        inline operator=(xoptional<CTO, CBO>&& rhs)
+        {
+            m_value = std::move(rhs).value();
+            m_flag = std::move(rhs).has_value();
+            return *this;
+        }
+
+        // Operators
+        template <class CTO, class CBO>
+        xoptional& operator+=(const xoptional<CTO, CBO>&);
+        template <class CTO, class CBO>
+        xoptional& operator-=(const xoptional<CTO, CBO>&);
+        template <class CTO, class CBO>
+        xoptional& operator*=(const xoptional<CTO, CBO>&);
+        template <class CTO, class CBO>
+        xoptional& operator/=(const xoptional<CTO, CBO>&);
+        template <class CTO, class CBO>
+        xoptional& operator%=(const xoptional<CTO, CBO>&);
+        template <class CTO, class CBO>
+        xoptional& operator&=(const xoptional<CTO, CBO>&);
+        template <class CTO, class CBO>
+        xoptional& operator|=(const xoptional<CTO, CBO>&);
+        template <class CTO, class CBO>
+        xoptional& operator^=(const xoptional<CTO, CBO>&);
+
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator+=(const T&);
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator-=(const T&);
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator*=(const T&);
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator/=(const T&);
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator%=(const T&);
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator&=(const T&);
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator|=(const T&);
+        template <class T, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T>)>
+        xoptional& operator^=(const T&);
+
+        // Access
+        std::add_lvalue_reference_t<CT> value() & noexcept;
+        std::add_lvalue_reference_t<std::add_const_t<CT>> value() const & noexcept;
+        std::conditional_t<std::is_reference<CT>::value, apply_cv_t<CT, value_type>&, value_type> value() && noexcept;
+        std::conditional_t<std::is_reference<CT>::value, const value_type&, value_type> value() const && noexcept;
+
+        template <class U>
+        value_type value_or(U&&) const & noexcept;
+        template <class U>
+        value_type value_or(U&&) const && noexcept;
+
+        // Access
+        std::add_lvalue_reference_t<CB> has_value() & noexcept;
+        std::add_lvalue_reference_t<std::add_const_t<CB>> has_value() const & noexcept;
+        std::conditional_t<std::is_reference<CB>::value, apply_cv_t<CB, flag_type>&, flag_type> has_value() && noexcept;
+        std::conditional_t<std::is_reference<CB>::value, const flag_type&, flag_type> has_value() const && noexcept;
+
+        // Swap
+        void swap(xoptional& other);
+
+        // Comparison
+        template <class CTO, class CBO>
+        bool equal(const xoptional<CTO, CBO>& rhs) const noexcept;
+
+        template <class CTO, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<CTO>)>
+        bool equal(const CTO& rhs) const noexcept;
+
+        xclosure_pointer<self_type&> operator&() &;
+        xclosure_pointer<const self_type&> operator&() const &;
+        xclosure_pointer<self_type> operator&() &&;
+
+    private:
+
+        template <class CTO, class CBO>
+        friend class xoptional;
+
+        CT m_value;
+        CB m_flag;
+    };
+
+    // value
+
+    template <class T, class U = disable_xoptional<std::decay_t<T>>>
+    T&& value(T&& v)
+    {
+        return std::forward<T>(v);
+    }
+
+    template <class CT, class CB>
+    decltype(auto) value(xtl::xoptional<CT, CB>&& v)
+    {
+        return std::move(v).value();
+    }
+
+    template <class CT, class CB>
+    decltype(auto) value(xtl::xoptional<CT, CB>& v)
+    {
+        return v.value();
+    }
+
+    template <class CT, class CB>
+    decltype(auto) value(const xtl::xoptional<CT, CB>& v)
+    {
+        return v.value();
+    }
+
+    // has_value
+
+    template <class T, class U = disable_xoptional<std::decay_t<T>>>
+    bool has_value(T&&)
+    {
+        return true;
+    }
+
+    template <class CT, class CB>
+    decltype(auto) has_value(xtl::xoptional<CT, CB>&& v)
+    {
+        return std::move(v).has_value();
+    }
+
+    template <class CT, class CB>
+    decltype(auto) has_value(xtl::xoptional<CT, CB>& v)
+    {
+        return v.has_value();
+    }
+
+    template <class CT, class CB>
+    decltype(auto) has_value(const xtl::xoptional<CT, CB>& v)
+    {
+        return v.has_value();
+    }
+
+    /***************************************
+     * optional and missing implementation *
+     ***************************************/
+
+    /**
+     * @brief Returns an \ref xoptional holding closure types on the specified parameters
+     *
+     * @tparam t the optional value
+     * @tparam b the boolean flag
+     */
+    template <class T, class B>
+    inline auto optional(T&& t, B&& b) noexcept
+    {
+        using optional_type = xoptional<closure_type_t<T>, closure_type_t<B>>;
+        return optional_type(std::forward<T>(t), std::forward<B>(b));
+    }
+
+    /**
+     * @brief Returns an \ref xoptional for a missig value
+     */
+    template <class T>
+    xoptional<T, bool> missing() noexcept
+    {
+        return xoptional<T, bool>(T(), false);
+    }
+
+    /****************************
+     * xoptional implementation *
+     ****************************/
+
+    // Constructors
+    template <class CT, class CB>
+    xoptional<CT, CB>::xoptional(value_type&& value, flag_type&& flag)
+        : m_value(std::move(value)), m_flag(std::move(flag))
+    {
+    }
+
+    template <class CT, class CB>
+    xoptional<CT, CB>::xoptional(std::add_lvalue_reference_t<CT> value, std::add_lvalue_reference_t<CB> flag)
+        : m_value(value), m_flag(flag)
+    {
+    }
+
+    template <class CT, class CB>
+    xoptional<CT, CB>::xoptional(value_type&& value, std::add_lvalue_reference_t<CB> flag)
+        : m_value(std::move(value)), m_flag(flag)
+    {
+    }
+
+    template <class CT, class CB>
+    xoptional<CT, CB>::xoptional(std::add_lvalue_reference_t<CT> value, flag_type&& flag)
+        : m_value(value), m_flag(std::move(flag))
+    {
+    }
+
+    // Operators
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator+=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value += rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator-=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value -= rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator*=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value *= rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator/=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value /= rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator%=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value %= rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator&=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value &= rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator|=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value |= rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::operator^=(const xoptional<CTO, CBO>& rhs) -> xoptional&
+    {
+        m_flag = m_flag && rhs.m_flag;
+        if (m_flag)
+        {
+            m_value ^= rhs.m_value;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator+=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value += rhs;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator-=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value -= rhs;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator*=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value *= rhs;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator/=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value /= rhs;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator%=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value %= rhs;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator&=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value &= rhs;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator|=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value |= rhs;
+        }
+        return *this;
+    }
+
+    template <class CT, class CB>
+    template <class T, check_requires<is_not_xoptional_nor_xmasked_value<T>>>
+    auto xoptional<CT, CB>::operator^=(const T& rhs) -> xoptional&
+    {
+        if (m_flag)
+        {
+            m_value ^= rhs;
+        }
+        return *this;
+    }
+
+    // Access
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::value() & noexcept -> std::add_lvalue_reference_t<CT>
+    {
+        return m_value;
+    }
+
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::value() const & noexcept -> std::add_lvalue_reference_t<std::add_const_t<CT>>
+    {
+        return m_value;
+    }
+
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::value() && noexcept -> std::conditional_t<std::is_reference<CT>::value, apply_cv_t<CT, value_type>&, value_type>
+    {
+        return m_value;
+    }
+
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::value() const && noexcept -> std::conditional_t<std::is_reference<CT>::value, const value_type&, value_type>
+    {
+        return m_value;
+    }
+
+    template <class CT, class CB>
+    template <class U>
+    auto xoptional<CT, CB>::value_or(U&& default_value) const & noexcept -> value_type
+    {
+        return m_flag ? m_value : std::forward<U>(default_value);
+    }
+
+    template <class CT, class CB>
+    template <class U>
+    auto xoptional<CT, CB>::value_or(U&& default_value) const && noexcept -> value_type
+    {
+        return m_flag ? m_value : std::forward<U>(default_value);
+    }
+
+    // Access
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::has_value() & noexcept -> std::add_lvalue_reference_t<CB>
+    {
+        return m_flag;
+    }
+
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::has_value() const & noexcept -> std::add_lvalue_reference_t<std::add_const_t<CB>>
+    {
+        return m_flag;
+    }
+
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::has_value() && noexcept -> std::conditional_t<std::is_reference<CB>::value, apply_cv_t<CB, flag_type>&, flag_type>
+    {
+        return m_flag;
+    }
+
+    template <class CT, class CB>
+    auto xoptional<CT, CB>::has_value() const && noexcept -> std::conditional_t<std::is_reference<CB>::value, const flag_type&, flag_type>
+    {
+        return m_flag;
+    }
+
+    // Swap
+    template <class CT, class CB>
+    void xoptional<CT, CB>::swap(xoptional& other)
+    {
+        std::swap(m_value, other.m_value);
+        std::swap(m_flag, other.m_flag);
+    }
+
+    // Comparison
+    template <class CT, class CB>
+    template <class CTO, class CBO>
+    auto xoptional<CT, CB>::equal(const xoptional<CTO, CBO>& rhs) const noexcept -> bool
+    {
+        return (!m_flag && !rhs.m_flag) || (m_value == rhs.m_value && (m_flag && rhs.m_flag));
+    }
+
+    template <class CT, class CB>
+    template <class CTO, check_requires<is_not_xoptional_nor_xmasked_value<CTO>>>
+    bool xoptional<CT, CB>::equal(const CTO& rhs) const noexcept
+    {
+        return m_flag ? (m_value == rhs) : false;
+    }
+
+    template <class CT, class CB>
+    inline auto xoptional<CT, CB>::operator&() & -> xclosure_pointer<self_type&>
+    {
+        return xclosure_pointer<self_type&>(*this);
+    }
+
+    template <class CT, class CB>
+    inline auto xoptional<CT, CB>::operator&() const & -> xclosure_pointer<const self_type&>
+    {
+        return xclosure_pointer<const self_type&>(*this);
+    }
+
+    template <class CT, class CB>
+    inline auto xoptional<CT, CB>::operator&() && -> xclosure_pointer<self_type>
+    {
+        return xclosure_pointer<self_type>(std::move(*this));
+    }
+
+    // External operators
+    template <class T, class B, class OC, class OT>
+    inline std::basic_ostream<OC, OT>& operator<<(std::basic_ostream<OC, OT>& out, const xoptional<T, B>& v)
+    {
+        if (v.has_value())
+        {
+            out << v.value();
+        }
+        else
+        {
+            out << "N/A";
+        }
+        return out;
+    }
+
+#ifdef __CLING__
+    template <class T, class B>
+    nlohmann::json mime_bundle_repr(const xoptional<T, B>& v)
+    {
+        auto bundle = nlohmann::json::object();
+        std::stringstream tmp;
+        tmp << v;
+        bundle["text/plain"] = tmp.str();
+        return bundle;
+    }
+#endif
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator==(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> bool
+    {
+        return e1.equal(e2);
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline bool operator==(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+    {
+        return e1.equal(e2);
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline bool operator==(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+    {
+        return e2.equal(e1);
+    }
+
+    template <class T, class B>
+    inline auto operator+(const xoptional<T, B>& e) noexcept
+        -> xoptional<std::decay_t<T>>
+    {
+        return e;
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator!=(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> bool
+    {
+        return !e1.equal(e2);
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline bool operator!=(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+    {
+        return !e1.equal(e2);
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline bool operator!=(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+    {
+        return !e2.equal(e1);
+    }
+
+    // Operations
+    template <class T, class B>
+    inline auto operator-(const xoptional<T, B>& e) noexcept
+        -> xoptional<std::decay_t<T>>
+    {
+        using value_type = std::decay_t<T>;
+        return e.has_value() ? -e.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator+(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() + e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator+(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 + e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator+(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() + e2 : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator-(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() - e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator-(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 - e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator-(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() - e2 : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator*(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() * e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator*(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 * e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator*(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() * e2 : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator/(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() / e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator/(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 / e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator/(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() / e2 : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator%(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() % e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator%(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 % e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator%(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() % e2 : missing<value_type>();
+    }
+
+    template <class T, class B>
+    inline auto operator~(const xoptional<T, B>& e) noexcept
+        -> xoptional<std::decay_t<T>>
+    {
+        using value_type = std::decay_t<T>;
+        return e.has_value() ? ~e.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator&(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() & e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator&(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 & e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator&(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() & e2 : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator|(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() | e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator|(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 | e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator|(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() | e2 : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator^(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() ^ e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator^(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 ^ e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator^(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() ^ e2 : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator||(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() || e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator||(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 || e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator||(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() || e2 : missing<value_type>();
+    }
+
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator&&(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() && e2.has_value() ? e1.value() && e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator&&(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e2.has_value() ? e1 && e2.value() : missing<value_type>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator&&(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> common_optional_t<T1, T2>
+    {
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;
+        return e1.has_value() ? e1.value() && e2 : missing<value_type>();
+    }
+
+    template <class T, class B>
+    inline auto operator!(const xoptional<T, B>& e) noexcept
+        -> xoptional<bool>
+    {
+        return e.has_value() ? !e.value() : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator<(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() && e2.has_value() ? e1.value() < e2.value() : missing<bool>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator<(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e2.has_value() ? e1 < e2.value() : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator<(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() ? e1.value() < e2 : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator<=(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() && e2.has_value() ? e1.value() <= e2.value() : missing<bool>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator<=(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e2.has_value() ? e1 <= e2.value() : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator<=(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() ? e1.value() <= e2 : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator>(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() && e2.has_value() ? e1.value() > e2.value() : missing<bool>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator>(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e2.has_value() ? e1 > e2.value() : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator>(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() ? e1.value() > e2 : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, class B2>
+    inline auto operator>=(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() && e2.has_value() ? e1.value() >= e2.value() : missing<bool>();
+    }
+
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)>
+    inline auto operator>=(const T1& e1, const xoptional<T2, B2>& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e2.has_value() ? e1 >= e2.value() : missing<bool>();
+    }
+
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)>
+    inline auto operator>=(const xoptional<T1, B1>& e1, const T2& e2) noexcept
+        -> xoptional<bool>
+    {
+        return e1.has_value() ? e1.value() >= e2 : missing<bool>();
+    }
+
+#define UNARY_OPTIONAL(NAME)                                                 \
+    template <class T, class B>                                              \
+    inline auto NAME(const xoptional<T, B>& e)                               \
+    {                                                                        \
+        using std::NAME;                                                     \
+        return e.has_value() ? NAME(e.value()) : missing<std::decay_t<T>>(); \
+    }
+
+#define UNARY_BOOL_OPTIONAL(NAME)                                       \
+    template <class T, class B>                                         \
+    inline xoptional<bool> NAME(const xoptional<T, B>& e)               \
+    {                                                                   \
+        using std::NAME;                                                \
+        return e.has_value() ? bool(NAME(e.value())) : missing<bool>(); \
+    }
+
+#define BINARY_OPTIONAL_1(NAME)                                                                   \
+    template <class T1, class B1, class T2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)> \
+    inline auto NAME(const xoptional<T1, B1>& e1, const T2& e2)                                   \
+        -> common_optional_t<T1, T2>                                                              \
+    {                                                                                             \
+        using std::NAME;                                                                          \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;                \
+        return e1.has_value() ? NAME(e1.value(), e2) : missing<value_type>();                     \
+    }
+
+
+#define BINARY_OPTIONAL_2(NAME)                                                                   \
+    template <class T1, class T2, class B2, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)> \
+    inline auto NAME(const T1& e1, const xoptional<T2, B2>& e2)                                   \
+        -> common_optional_t<T1, T2>                                                              \
+    {                                                                                             \
+        using std::NAME;                                                                          \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;                \
+        return e2.has_value() ? NAME(e1, e2.value()) : missing<value_type>();                     \
+    }
+
+#define BINARY_OPTIONAL_12(NAME)                                                                        \
+    template <class T1, class B1, class T2, class B2>                                                   \
+    inline auto NAME(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2)                          \
+    {                                                                                                   \
+        using std::NAME;                                                                                \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>>;                      \
+        return e1.has_value() && e2.has_value() ? NAME(e1.value(), e2.value()) : missing<value_type>(); \
+    }
+
+#define BINARY_OPTIONAL(NAME) \
+    BINARY_OPTIONAL_1(NAME)   \
+    BINARY_OPTIONAL_2(NAME)   \
+    BINARY_OPTIONAL_12(NAME)
+
+#define TERNARY_OPTIONAL_1(NAME)                                                                                                                    \
+    template <class T1, class B1, class T2, class T3, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>, is_not_xoptional_nor_xmasked_value<T3>)> \
+    inline auto NAME(const xoptional<T1, B1>& e1, const T2& e2, const T3& e3)                                                                       \
+        -> common_optional_t<T1, T2, T3>                                                                                                            \
+    {                                                                                                                                               \
+        using std::NAME;                                                                                                                            \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>, std::decay_t<T3>>;                                                \
+        return e1.has_value() ? NAME(e1.value(), e2, e3) : missing<value_type>();                                                                   \
+    }
+
+#define TERNARY_OPTIONAL_2(NAME)                                                                                                                    \
+    template <class T1, class T2, class B2, class T3, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>, is_not_xoptional_nor_xmasked_value<T3>)> \
+    inline auto NAME(const T1& e1, const xoptional<T2, B2>& e2, const T3& e3)                                                                       \
+        -> common_optional_t<T1, T2, T3>                                                                                                            \
+    {                                                                                                                                               \
+        using std::NAME;                                                                                                                            \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>, std::decay_t<T3>>;                                                \
+        return e2.has_value() ? NAME(e1, e2.value(), e3) : missing<value_type>();                                                                   \
+    }
+
+#define TERNARY_OPTIONAL_3(NAME)                                                                                                                    \
+    template <class T1, class T2, class T3, class B3, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>, is_not_xoptional_nor_xmasked_value<T2>)> \
+    inline auto NAME(const T1& e1, const T2& e2, const xoptional<T3, B3>& e3)                                                                       \
+        -> common_optional_t<T1, T2, T3>                                                                                                            \
+    {                                                                                                                                               \
+        using std::NAME;                                                                                                                            \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>, std::decay_t<T3>>;                                                \
+        return e3.has_value() ? NAME(e1, e2, e3.value()) : missing<value_type>();                                                                   \
+    }
+
+#define TERNARY_OPTIONAL_12(NAME)                                                                                     \
+    template <class T1, class B1, class T2, class B2, class T3, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T3>)> \
+    inline auto NAME(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2, const T3& e3)                          \
+        -> common_optional_t<T1, T2, T3>                                                                              \
+    {                                                                                                                 \
+        using std::NAME;                                                                                              \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>, std::decay_t<T3>>;                  \
+        return (e1.has_value() && e2.has_value()) ? NAME(e1.value(), e2.value(), e3) : missing<value_type>();         \
+    }
+
+#define TERNARY_OPTIONAL_13(NAME)                                                                                     \
+    template <class T1, class B1, class T2, class T3, class B3, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T2>)> \
+    inline auto NAME(const xoptional<T1, B1>& e1, const T2& e2, const xoptional<T3, B3>& e3)                          \
+        -> common_optional_t<T1, T2, T3>                                                                              \
+    {                                                                                                                 \
+        using std::NAME;                                                                                              \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>, std::decay_t<T3>>;                  \
+        return (e1.has_value() && e3.has_value()) ? NAME(e1.value(), e2, e3.value()) : missing<value_type>();         \
+    }
+
+#define TERNARY_OPTIONAL_23(NAME)                                                                                     \
+    template <class T1, class T2, class B2, class T3, class B3, XTL_REQUIRES(is_not_xoptional_nor_xmasked_value<T1>)> \
+    inline auto NAME(const T1& e1, const xoptional<T2, B2>& e2, const xoptional<T3, B3>& e3)                          \
+        -> common_optional_t<T1, T2, T3>                                                                              \
+    {                                                                                                                 \
+        using std::NAME;                                                                                              \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>, std::decay_t<T3>>;                  \
+        return (e2.has_value() && e3.has_value()) ? NAME(e1, e2.value(), e3.value()) : missing<value_type>();         \
+    }
+
+#define TERNARY_OPTIONAL_123(NAME)                                                                                                      \
+    template <class T1, class B1, class T2, class B2, class T3, class B3>                                                               \
+    inline auto NAME(const xoptional<T1, B1>& e1, const xoptional<T2, B2>& e2, const xoptional<T3, B3>& e3)                             \
+    {                                                                                                                                   \
+        using std::NAME;                                                                                                                \
+        using value_type = std::common_type_t<std::decay_t<T1>, std::decay_t<T2>, std::decay_t<T3>>;                                    \
+        return (e1.has_value() && e2.has_value() && e3.has_value()) ? NAME(e1.value(), e2.value(), e3.value()) : missing<value_type>(); \
+    }
+
+#define TERNARY_OPTIONAL(NAME) \
+    TERNARY_OPTIONAL_1(NAME)   \
+    TERNARY_OPTIONAL_2(NAME)   \
+    TERNARY_OPTIONAL_3(NAME)   \
+    TERNARY_OPTIONAL_12(NAME)  \
+    TERNARY_OPTIONAL_13(NAME)  \
+    TERNARY_OPTIONAL_23(NAME)  \
+    TERNARY_OPTIONAL_123(NAME)
+
+    UNARY_OPTIONAL(abs)
+    UNARY_OPTIONAL(fabs)
+    BINARY_OPTIONAL(fmod)
+    BINARY_OPTIONAL(remainder)
+    TERNARY_OPTIONAL(fma)
+    BINARY_OPTIONAL(fmax)
+    BINARY_OPTIONAL(fmin)
+    BINARY_OPTIONAL(fdim)
+    UNARY_OPTIONAL(exp)
+    UNARY_OPTIONAL(exp2)
+    UNARY_OPTIONAL(expm1)
+    UNARY_OPTIONAL(log)
+    UNARY_OPTIONAL(log10)
+    UNARY_OPTIONAL(log2)
+    UNARY_OPTIONAL(log1p)
+    BINARY_OPTIONAL(pow)
+    UNARY_OPTIONAL(sqrt)
+    UNARY_OPTIONAL(cbrt)
+    BINARY_OPTIONAL(hypot)
+    UNARY_OPTIONAL(sin)
+    UNARY_OPTIONAL(cos)
+    UNARY_OPTIONAL(tan)
+    UNARY_OPTIONAL(acos)
+    UNARY_OPTIONAL(asin)
+    UNARY_OPTIONAL(atan)
+    BINARY_OPTIONAL(atan2)
+    UNARY_OPTIONAL(sinh)
+    UNARY_OPTIONAL(cosh)
+    UNARY_OPTIONAL(tanh)
+    UNARY_OPTIONAL(acosh)
+    UNARY_OPTIONAL(asinh)
+    UNARY_OPTIONAL(atanh)
+    UNARY_OPTIONAL(erf)
+    UNARY_OPTIONAL(erfc)
+    UNARY_OPTIONAL(tgamma)
+    UNARY_OPTIONAL(lgamma)
+    UNARY_OPTIONAL(ceil)
+    UNARY_OPTIONAL(floor)
+    UNARY_OPTIONAL(trunc)
+    UNARY_OPTIONAL(round)
+    UNARY_OPTIONAL(nearbyint)
+    UNARY_OPTIONAL(rint)
+    UNARY_BOOL_OPTIONAL(isfinite)
+    UNARY_BOOL_OPTIONAL(isinf)
+    UNARY_BOOL_OPTIONAL(isnan)
+
+#undef TERNARY_OPTIONAL
+#undef TERNARY_OPTIONAL_123
+#undef TERNARY_OPTIONAL_23
+#undef TERNARY_OPTIONAL_13
+#undef TERNARY_OPTIONAL_12
+#undef TERNARY_OPTIONAL_3
+#undef TERNARY_OPTIONAL_2
+#undef TERNARY_OPTIONAL_1
+#undef BINARY_OPTIONAL
+#undef BINARY_OPTIONAL_12
+#undef BINARY_OPTIONAL_2
+#undef BINARY_OPTIONAL_1
+#undef UNARY_OPTIONAL
+
+    /*************************
+     * select implementation *
+     *************************/
+
+    template <class B, class T1, class T2, XTL_REQUIRES(at_least_one_xoptional<B, T1, T2>)>
+    inline common_optional_t<T1, T2> select(const B& cond, const T1& v1, const T2& v2) noexcept
+    {
+        using bool_type = common_optional_t<B>;
+        using return_type = common_optional_t<T1, T2>;
+        bool_type opt_cond(cond);
+        return opt_cond.has_value() ?
+            opt_cond.value() ? return_type(v1) : return_type(v2) :
+            missing<typename return_type::value_type>();
+    }
+}
+
+#endif

+ 141 - 0
3rd/numpy/include/xtl/xoptional_meta.hpp

@@ -0,0 +1,141 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
+* Martin Renou                                                             *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_OPTIONAL_META_HPP
+#define XTL_OPTIONAL_META_HPP
+
+#include <type_traits>
+
+#include "xmasked_value_meta.hpp"
+#include "xmeta_utils.hpp"
+#include "xtype_traits.hpp"
+
+namespace xtl
+{
+    template <class CT, class CB = bool>
+    class xoptional;
+
+    namespace detail
+    {
+        template <class E>
+        struct is_xoptional_impl : std::false_type
+        {
+        };
+
+        template <class CT, class CB>
+        struct is_xoptional_impl<xoptional<CT, CB>> : std::true_type
+        {
+        };
+
+        template <class CT, class CTO, class CBO>
+        using converts_from_xoptional = disjunction<
+            std::is_constructible<CT, const xoptional<CTO, CBO>&>,
+            std::is_constructible<CT, xoptional<CTO, CBO>&>,
+            std::is_constructible<CT, const xoptional<CTO, CBO>&&>,
+            std::is_constructible<CT, xoptional<CTO, CBO>&&>,
+            std::is_convertible<const xoptional<CTO, CBO>&, CT>,
+            std::is_convertible<xoptional<CTO, CBO>&, CT>,
+            std::is_convertible<const xoptional<CTO, CBO>&&, CT>,
+            std::is_convertible<xoptional<CTO, CBO>&&, CT>
+        >;
+
+        template <class CT, class CTO, class CBO>
+        using assigns_from_xoptional = disjunction<
+            std::is_assignable<std::add_lvalue_reference_t<CT>, const xoptional<CTO, CBO>&>,
+            std::is_assignable<std::add_lvalue_reference_t<CT>, xoptional<CTO, CBO>&>,
+            std::is_assignable<std::add_lvalue_reference_t<CT>, const xoptional<CTO, CBO>&&>,
+            std::is_assignable<std::add_lvalue_reference_t<CT>, xoptional<CTO, CBO>&&>
+        >;
+
+        template <class... Args>
+        struct common_optional_impl;
+
+        template <class T>
+        struct common_optional_impl<T>
+        {
+            using type = std::conditional_t<is_xoptional_impl<T>::value, T, xoptional<T>>;
+        };
+
+        template <class T>
+        struct identity
+        {
+            using type = T;
+        };
+
+        template <class T>
+        struct get_value_type
+        {
+            using type = typename T::value_type;
+        };
+
+        template<class T1, class T2>
+        struct common_optional_impl<T1, T2>
+        {
+            using decay_t1 = std::decay_t<T1>;
+            using decay_t2 = std::decay_t<T2>;
+            using type1 = xtl::mpl::eval_if_t<xtl::is_fundamental<decay_t1>, identity<decay_t1>, get_value_type<decay_t1>>;
+            using type2 = xtl::mpl::eval_if_t<xtl::is_fundamental<decay_t2>, identity<decay_t2>, get_value_type<decay_t2>>;
+            using type = xoptional<std::common_type_t<type1, type2>>;
+        };
+
+        template <class T1, class T2, class B2>
+        struct common_optional_impl<T1, xoptional<T2, B2>>
+            : common_optional_impl<T1, T2>
+        {
+        };
+
+        template <class T1, class B1, class T2>
+        struct common_optional_impl<xoptional<T1, B1>, T2>
+            : common_optional_impl<T1, T2>
+        {
+        };
+
+        template <class T1, class B1, class T2, class B2>
+        struct common_optional_impl<xoptional<T1, B1>, xoptional<T2, B2>>
+            : common_optional_impl<T1, T2>
+        {
+        };
+
+        template <class T1, class T2, class... Args>
+        struct common_optional_impl<T1, T2, Args...>
+        {
+            using type = typename common_optional_impl<
+                             typename common_optional_impl<T1, T2>::type,
+                             Args...
+                         >::type;
+        };
+    }
+
+    template <class E>
+    using is_xoptional = detail::is_xoptional_impl<E>;
+
+    template <class E, class R = void>
+    using disable_xoptional = std::enable_if_t<!is_xoptional<E>::value, R>;
+
+    template <class... Args>
+    struct at_least_one_xoptional : disjunction<is_xoptional<Args>...>
+    {
+    };
+
+    template <class... Args>
+    struct common_optional : detail::common_optional_impl<Args...>
+    {
+    };
+
+    template <class... Args>
+    using common_optional_t = typename common_optional<Args...>::type;
+
+    template <class E>
+    struct is_not_xoptional_nor_xmasked_value : negation<disjunction<is_xoptional<E>, is_xmasked_value<E>>>
+    {
+    };
+}
+
+#endif

+ 622 - 0
3rd/numpy/include/xtl/xoptional_sequence.hpp

@@ -0,0 +1,622 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_OPTIONAL_SEQUENCE_HPP
+#define XTL_OPTIONAL_SEQUENCE_HPP
+
+#include <array>
+#include <bitset>
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "xdynamic_bitset.hpp"
+#include "xiterator_base.hpp"
+#include "xoptional.hpp"
+#include "xsequence.hpp"
+
+namespace xtl
+{
+    /**************************************
+     * Optimized 1-D xoptional containers *
+     **************************************/
+
+    template <class ITV, class ITB>
+    class xoptional_iterator;
+
+    template <class BC, class FC>
+    class xoptional_sequence
+    {
+    public:
+
+        // Internal typedefs
+
+        using base_container_type = BC;
+        using base_value_type = typename base_container_type::value_type;
+        using base_reference = typename base_container_type::reference;
+        using base_const_reference = typename base_container_type::const_reference;
+
+        using flag_container_type = FC;
+        using flag_type = typename flag_container_type::value_type;
+        using flag_reference = typename flag_container_type::reference;
+        using flag_const_reference = typename flag_container_type::const_reference;
+
+        // Container typedefs
+        using value_type = xoptional<base_value_type, flag_type>;
+        using reference = xoptional<base_reference, flag_reference>;
+        using const_reference = xoptional<base_const_reference, flag_const_reference>;
+        using pointer = xclosure_pointer<reference>;
+        using const_pointer = xclosure_pointer<const_reference>;
+
+        // Other typedefs
+        using size_type = typename base_container_type::size_type;
+        using difference_type = typename base_container_type::difference_type;
+        using iterator = xoptional_iterator<typename base_container_type::iterator,
+                                            typename flag_container_type::iterator>;
+        using const_iterator = xoptional_iterator<typename base_container_type::const_iterator,
+                                                  typename flag_container_type::const_iterator>;
+
+        using reverse_iterator = xoptional_iterator<typename base_container_type::reverse_iterator,
+                                                    typename flag_container_type::reverse_iterator>;
+        using const_reverse_iterator = xoptional_iterator<typename base_container_type::const_reverse_iterator,
+                                                          typename flag_container_type::const_reverse_iterator>;
+
+        bool empty() const noexcept;
+        size_type size() const noexcept;
+        size_type max_size() const noexcept;
+
+        reference at(size_type i);
+        const_reference at(size_type i) const;
+
+        reference operator[](size_type i);
+        const_reference operator[](size_type i) const;
+
+        reference front();
+        const_reference front() const;
+
+        reference back();
+        const_reference back() const;
+
+        iterator begin() noexcept;
+        iterator end() noexcept;
+
+        const_iterator begin() const noexcept;
+        const_iterator end() const noexcept;
+        const_iterator cbegin() const noexcept;
+        const_iterator cend() const noexcept;
+
+        reverse_iterator rbegin() noexcept;
+        reverse_iterator rend() noexcept;
+
+        const_reverse_iterator rbegin() const noexcept;
+        const_reverse_iterator rend() const noexcept;
+        const_reverse_iterator crbegin() const noexcept;
+        const_reverse_iterator crend() const noexcept;
+
+        base_container_type value() && noexcept;
+        base_container_type& value() & noexcept;
+        const base_container_type& value() const & noexcept;
+
+        flag_container_type has_value() && noexcept;
+        flag_container_type& has_value() & noexcept;
+        const flag_container_type& has_value() const & noexcept;
+
+    protected:
+
+        xoptional_sequence() = default;
+        xoptional_sequence(size_type s, const base_value_type& v);
+        template <class CTO, class CBO>
+        xoptional_sequence(size_type s, const xoptional<CTO, CBO>& v);
+
+        ~xoptional_sequence() = default;
+
+        xoptional_sequence(const xoptional_sequence&) = default;
+        xoptional_sequence& operator=(const xoptional_sequence&) = default;
+
+        xoptional_sequence(xoptional_sequence&&) = default;
+        xoptional_sequence& operator=(xoptional_sequence&&) = default;
+
+        base_container_type m_values;
+        flag_container_type m_flags;
+    };
+
+    template <class BC, class FC>
+    bool operator==(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs);
+
+    template <class BC, class FC>
+    bool operator!=(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs);
+
+    template <class BC, class FC>
+    bool operator<(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs);
+
+    template <class BC, class FC>
+    bool operator<=(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs);
+
+    template <class BC, class FC>
+    bool operator>(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs);
+
+    template <class BC, class FC>
+    bool operator>=(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs);
+
+    /********************************
+     * xoptional_array declarations *
+     ********************************/
+
+    // There is no value_type in std::bitset ...
+    template <class T, std::size_t I, class BC = xdynamic_bitset<std::size_t>>
+    class xoptional_array : public xoptional_sequence<std::array<T, I>, BC>
+    {
+    public:
+
+        using self_type = xoptional_array;
+        using base_container_type = std::array<T, I>;
+        using flag_container_type = BC;
+        using base_type = xoptional_sequence<base_container_type, flag_container_type>;
+        using base_value_type = typename base_type::base_value_type;
+        using size_type = typename base_type::size_type;
+
+        xoptional_array() = default;
+        xoptional_array(size_type s, const base_value_type& v);
+
+        template <class CTO, class CBO>
+        xoptional_array(size_type s, const xoptional<CTO, CBO>& v);
+    };
+
+    /********************
+     * xoptional_vector *
+     ********************/
+
+    template <class T, class A = std::allocator<T>, class BC = xdynamic_bitset<std::size_t>>
+    class xoptional_vector : public xoptional_sequence<std::vector<T, A>, BC>
+    {
+    public:
+
+        using self_type = xoptional_vector;
+        using base_container_type = std::vector<T, A>;
+        using flag_container_type = BC;
+        using base_type = xoptional_sequence<base_container_type, flag_container_type>;
+        using base_value_type = typename base_type::base_value_type;
+        using allocator_type = A;
+
+        using value_type = typename base_type::value_type;
+        using size_type = typename base_type::size_type;
+        using difference_type = typename base_type::difference_type;
+        using reference = typename base_type::reference;
+        using const_reference = typename base_type::const_reference;
+        using pointer = typename base_type::pointer;
+        using const_pointer = typename base_type::const_pointer;
+
+        using iterator = typename base_type::iterator;
+        using const_iterator = typename base_type::const_iterator;
+        using reverse_iterator = typename base_type::reverse_iterator;
+        using const_reverse_iterator = typename base_type::const_reverse_iterator;
+
+        xoptional_vector() = default;
+        xoptional_vector(size_type, const base_value_type&);
+
+        template <class CTO, class CBO>
+        xoptional_vector(size_type, const xoptional<CTO, CBO>&);
+
+        void resize(size_type);
+        void resize(size_type, const base_value_type&);
+        template <class CTO, class CBO>
+        void resize(size_type, const xoptional<CTO, CBO>&);
+    };
+
+    /**********************************
+     * xoptional_iterator declaration *
+     **********************************/
+
+    template <class ITV, class ITB>
+    struct xoptional_iterator_traits
+    {
+        using iterator_type = xoptional_iterator<ITV, ITB>;
+        using value_type = xoptional<typename ITV::value_type, typename ITB::value_type>;
+        using reference = xoptional<typename ITV::reference, typename ITB::reference>;
+        using pointer = xclosure_pointer<reference>;
+        using difference_type = typename ITV::difference_type;
+    };
+
+    template <class ITV, class ITB>
+    class xoptional_iterator : public xrandom_access_iterator_base2<xoptional_iterator_traits<ITV, ITB>>
+    {
+    public:
+
+        using self_type = xoptional_iterator<ITV, ITB>;
+        using base_type = xrandom_access_iterator_base2<xoptional_iterator_traits<ITV, ITB>>;
+
+        using value_type = typename base_type::value_type;
+        using reference = typename base_type::reference;
+        using pointer = typename base_type::pointer;
+        using difference_type = typename base_type::difference_type;
+
+        xoptional_iterator() = default;
+        xoptional_iterator(ITV itv, ITB itb);
+
+        self_type& operator++();
+        self_type& operator--();
+
+        self_type& operator+=(difference_type n);
+        self_type& operator-=(difference_type n);
+
+        difference_type operator-(const self_type& rhs) const;
+
+        reference operator*() const;
+        pointer operator->() const;
+
+        bool operator==(const self_type& rhs) const;
+        bool operator<(const self_type& rhs) const;
+
+    private:
+
+        ITV m_itv;
+        ITB m_itb;
+    };
+
+    /*************************************
+     * xoptional_sequence implementation *
+     *************************************/
+
+    template <class BC, class FC>
+    inline xoptional_sequence<BC, FC>::xoptional_sequence(size_type s, const base_value_type& v)
+        : m_values(make_sequence<base_container_type>(s, v)),
+          m_flags(make_sequence<flag_container_type>(s, true))
+    {
+    }
+
+    template <class BC, class FC>
+    template <class CTO, class CBO>
+    inline xoptional_sequence<BC, FC>::xoptional_sequence(size_type s, const xoptional<CTO, CBO>& v)
+        : m_values(make_sequence<base_container_type>(s, v.value())), m_flags(make_sequence<flag_container_type>(s, v.has_value()))
+    {
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::empty() const noexcept -> bool
+    {
+        return m_values.empty();
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::size() const noexcept -> size_type
+    {
+        return m_values.size();
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::max_size() const noexcept -> size_type
+    {
+        return m_values.max_size();
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::at(size_type i) -> reference
+    {
+        return reference(m_values.at(i), m_flags.at(i));
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::at(size_type i) const -> const_reference
+    {
+        return const_reference(m_values.at(i), m_flags.at(i));
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::operator[](size_type i) -> reference
+    {
+        return reference(m_values[i], m_flags[i]);
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::operator[](size_type i) const -> const_reference
+    {
+        return const_reference(m_values[i], m_flags[i]);
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::front() -> reference
+    {
+        return reference(m_values.front(), m_flags.front());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::front() const -> const_reference
+    {
+        return const_reference(m_values.front(), m_flags.front());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::back() -> reference
+    {
+        return reference(m_values.back(), m_flags.back());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::back() const -> const_reference
+    {
+        return const_reference(m_values.back(), m_flags.back());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::begin() noexcept -> iterator
+    {
+        return iterator(m_values.begin(), m_flags.begin());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::end() noexcept -> iterator
+    {
+        return iterator(m_values.end(), m_flags.end());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::begin() const noexcept -> const_iterator
+    {
+        return cbegin();
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::end() const noexcept -> const_iterator
+    {
+        return cend();
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::cbegin() const noexcept -> const_iterator
+    {
+        return const_iterator(m_values.cbegin(), m_flags.cbegin());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::cend() const noexcept -> const_iterator
+    {
+        return const_iterator(m_values.cend(), m_flags.cend());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::rbegin() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(m_values.rbegin(), m_flags.rbegin());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::rend() noexcept -> reverse_iterator
+    {
+        return reverse_iterator(m_values.rend(), m_flags.rend());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::rbegin() const noexcept -> const_reverse_iterator
+    {
+        return crbegin();
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::rend() const noexcept -> const_reverse_iterator
+    {
+        return crend();
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::crbegin() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_values.crbegin(), m_flags.crbegin());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::crend() const noexcept -> const_reverse_iterator
+    {
+        return const_reverse_iterator(m_values.crend(), m_flags.crend());
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::value() && noexcept -> base_container_type
+    {
+        return m_values;
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::value() & noexcept -> base_container_type&
+    {
+        return m_values;
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::value() const & noexcept -> const base_container_type&
+    {
+        return m_values;
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::has_value() && noexcept-> flag_container_type
+    {
+        return m_flags;
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::has_value() & noexcept -> flag_container_type&
+    {
+        return m_flags;
+    }
+
+    template <class BC, class FC>
+    inline auto xoptional_sequence<BC, FC>::has_value() const & noexcept -> const flag_container_type&
+    {
+        return m_flags;
+    }
+
+    template <class BC, class FC>
+    inline bool operator==(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs)
+    {
+        return lhs.value() == rhs.value() && lhs.has_value() == rhs.has_value();
+    }
+
+    template <class BC, class FC>
+    inline bool operator!=(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+    template <class BC, class FC>
+    inline bool operator<(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs)
+    {
+        return lhs.value() < rhs.value() && lhs.has_value() == rhs.has_value();
+    }
+
+    template <class BC, class FC>
+    inline bool operator<=(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs)
+    {
+        return lhs.value() <= rhs.value() && lhs.has_value() == rhs.has_value();
+    }
+
+    template <class BC, class FC>
+    inline bool operator>(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs)
+    {
+        return lhs.value() > rhs.value() && lhs.has_value() == rhs.has_value();
+    }
+
+    template <class BC, class FC>
+    inline bool operator>=(const xoptional_sequence<BC, FC>& lhs, const xoptional_sequence<BC, FC>& rhs)
+    {
+        return lhs.value() >= rhs.value() && lhs.has_value() == rhs.has_value();
+    }
+
+    /**********************************
+     * xoptional_array implementation *
+     **********************************/
+
+    template <class T, std::size_t I, class BC>
+    xoptional_array<T, I, BC>::xoptional_array(size_type s, const base_value_type& v)
+        : base_type(s, v)
+    {
+    }
+
+    template <class T, std::size_t I, class BC>
+    template <class CTO, class CBO>
+    xoptional_array<T, I, BC>::xoptional_array(size_type s, const xoptional<CTO, CBO>& v)
+        : base_type(s, v)
+    {
+    }
+
+    /*******************************************************
+     * xoptional_array and xoptional_vector implementation *
+     *******************************************************/
+
+    template <class T, class A, class BC>
+    xoptional_vector<T, A, BC>::xoptional_vector(size_type s, const base_value_type& v)
+        : base_type(s, v)
+    {
+    }
+
+    template <class T, class A, class BC>
+    template <class CTO, class CBO>
+    xoptional_vector<T, A, BC>::xoptional_vector(size_type s, const xoptional<CTO, CBO>& v)
+        : base_type(s, v)
+    {
+    }
+
+    template <class T, class A, class BC>
+    void xoptional_vector<T, A, BC>::resize(size_type s)
+    {
+        // Default to missing
+        this->m_values.resize(s);
+        this->m_flags.resize(s, false);
+    }
+
+    template <class T, class A, class BC>
+    void xoptional_vector<T, A, BC>::resize(size_type s, const base_value_type& v)
+    {
+        this->m_values.resize(s, v);
+        this->m_flags.resize(s, true);
+    }
+
+    template <class T, class A, class BC>
+    template <class CTO, class CBO>
+    void xoptional_vector<T, A, BC>::resize(size_type s, const xoptional<CTO, CBO>& v)
+    {
+        this->m_values.resize(s, v.value());
+        this->m_flags.resize(s, v.has_value());
+    }
+
+    /*************************************
+     * xoptional_iterator implementation *
+     *************************************/
+
+    template <class ITV, class ITB>
+    xoptional_iterator<ITV, ITB>::xoptional_iterator(ITV itv, ITB itb)
+        : m_itv(itv), m_itb(itb)
+    {
+    }
+
+    template <class ITV, class ITB>
+    auto xoptional_iterator<ITV, ITB>::operator++() -> self_type&
+    {
+        ++m_itv;
+        ++m_itb;
+        return *this;
+    }
+
+    template <class ITV, class ITB>
+    auto xoptional_iterator<ITV, ITB>::operator--() -> self_type&
+    {
+        --m_itv;
+        --m_itb;
+        return *this;
+    }
+
+    template <class ITV, class ITB>
+    auto xoptional_iterator<ITV, ITB>::operator+=(difference_type n) -> self_type&
+    {
+        m_itv += n;
+        m_itb += n;
+        return *this;
+    }
+
+    template <class ITV, class ITB>
+    auto xoptional_iterator<ITV, ITB>::operator-=(difference_type n) -> self_type&
+    {
+        m_itv -= n;
+        m_itb -= n;
+        return *this;
+    }
+
+    template <class ITV, class ITB>
+    auto xoptional_iterator<ITV, ITB>::operator-(const self_type& rhs) const -> difference_type
+    {
+        return m_itv - rhs.m_itv;
+    }
+
+    template <class ITV, class ITB>
+    auto xoptional_iterator<ITV, ITB>::operator*() const -> reference
+    {
+        return reference(*m_itv, *m_itb);
+    }
+
+    template <class ITV, class ITB>
+    auto xoptional_iterator<ITV, ITB>::operator-> () const -> pointer
+    {
+        return pointer(operator*());
+    }
+
+    template <class ITV, class ITB>
+    bool xoptional_iterator<ITV, ITB>::operator==(const self_type& rhs) const
+    {
+        return m_itv == rhs.m_itv && m_itb == rhs.m_itb;
+    }
+
+    template <class ITV, class ITB>
+    bool xoptional_iterator<ITV, ITB>::operator<(const self_type& rhs) const
+    {
+        return m_itv < rhs.m_itv && m_itb < rhs.m_itb;
+    }
+}
+
+#endif

+ 42 - 0
3rd/numpy/include/xtl/xplatform.hpp

@@ -0,0 +1,42 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XPLATFORM_HPP
+#define XTL_XPLATFORM_HPP
+
+#include <cstring>
+#include <cstdint>
+
+namespace xtl
+{
+    enum class endian
+    {
+        big_endian,
+        little_endian,
+        mixed
+    };
+
+    inline endian endianness()
+    {
+        uint32_t utmp = 0x01020304;
+        char btmp[sizeof(utmp)];
+        std::memcpy(&btmp[0], &utmp, sizeof(utmp));
+        switch(btmp[0])
+        {
+        case 0x01:
+            return endian::big_endian;
+        case 0x04:
+            return endian::little_endian;
+        default:
+            return endian::mixed;
+        }
+    }
+}
+
+#endif

+ 48 - 0
3rd/numpy/include/xtl/xproxy_wrapper.hpp

@@ -0,0 +1,48 @@
+/***************************************************************************
+* Copyright (c) 2016, Johan Mabille, Sylvain Corlay and Wolf Vollprecht    *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XPROXY_WRAPPER_HPP
+#define XTL_XPROXY_WRAPPER_HPP
+
+#include "xclosure.hpp"
+
+namespace xtl
+{
+
+    template <class P>
+    class xproxy_wrapper_impl : public P
+    {
+    public:
+
+        using self_type = xproxy_wrapper_impl<P>;
+        using lv_pointer = xclosure_pointer<P&>;
+        using rv_pointer = xclosure_pointer<P>;
+
+        explicit xproxy_wrapper_impl(P&& rhs)
+            : P(std::move(rhs))
+        {
+        }
+
+        inline lv_pointer operator&() & { return lv_pointer(*this); }
+        inline rv_pointer operator&() && { return rv_pointer(std::move(*this)); }
+    };
+
+    template <class P>
+    using xproxy_wrapper = std::conditional_t<std::is_class<P>::value,
+                                              xproxy_wrapper_impl<P>,
+                                              xclosure_wrapper<P>>;
+
+    template <class P>
+    inline xproxy_wrapper<P> proxy_wrapper(P&& proxy)
+    {
+        return xproxy_wrapper<P>(std::forward<P>(proxy));
+    }
+}
+
+#endif
+

+ 215 - 0
3rd/numpy/include/xtl/xsequence.hpp

@@ -0,0 +1,215 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_SEQUENCE_HPP
+#define XTL_SEQUENCE_HPP
+
+#include <array>
+#include <cstddef>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "xtl_config.hpp"
+#include "xmeta_utils.hpp"
+
+namespace xtl
+{
+    template <class S>
+    S make_sequence(typename S::size_type size);
+
+    template <class S>
+    S make_sequence(typename S::size_type size, typename S::value_type v);
+
+    template <class S>
+    S make_sequence(std::initializer_list<typename S::value_type> init);
+
+    template <class R, class A>
+    decltype(auto) forward_sequence(A&& s);
+
+    // equivalent to std::size(c) in c++17
+    template <class C>
+    constexpr auto sequence_size(const C& c) -> decltype(c.size());
+
+    // equivalent to std::size(a) in c++17
+    template <class T, std::size_t N>
+    constexpr std::size_t sequence_size(const T (&a)[N]);
+
+    /********************************
+     * make_sequence implementation *
+     ********************************/
+
+    namespace detail
+    {
+        template <class S>
+        struct sequence_builder
+        {
+            using value_type = typename S::value_type;
+            using size_type = typename S::size_type;
+
+            inline static S make(size_type size)
+            {
+                return S(size);
+            }
+
+            inline static S make(size_type size, value_type v)
+            {
+                return S(size, v);
+            }
+
+            inline static S make(std::initializer_list<value_type> init)
+            {
+                return S(init);
+            }
+        };
+
+        template <class T, std::size_t N>
+        struct sequence_builder<std::array<T, N>>
+        {
+            using sequence_type = std::array<T, N>;
+            using value_type = typename sequence_type::value_type;
+            using size_type = typename sequence_type::size_type;
+
+            inline static sequence_type make(size_type /*size*/)
+            {
+                return sequence_type();
+            }
+
+            inline static sequence_type make(size_type /*size*/, value_type v)
+            {
+                sequence_type s;
+                s.fill(v);
+                return s;
+            }
+
+            inline static sequence_type make(std::initializer_list<value_type> init)
+            {
+                sequence_type s;
+                std::copy(init.begin(), init.end(), s.begin());
+                return s;
+            }
+        };
+    }
+
+    template <class S>
+    inline S make_sequence(typename S::size_type size)
+    {
+        return detail::sequence_builder<S>::make(size);
+    }
+
+    template <class S>
+    inline S make_sequence(typename S::size_type size, typename S::value_type v)
+    {
+        return detail::sequence_builder<S>::make(size, v);
+    }
+
+    template <class S>
+    inline S make_sequence(std::initializer_list<typename S::value_type> init)
+    {
+        return detail::sequence_builder<S>::make(init);
+    }
+
+    /***********************************
+     * forward_sequence implementation *
+     ***********************************/
+
+    namespace detail
+    {
+        template <class R, class A, class E = void>
+        struct sequence_forwarder_impl
+        {
+            template <class T>
+            static inline R forward(const T& r)
+            {
+                R ret;
+                std::copy(std::begin(r), std::end(r), std::begin(ret));
+                return ret;
+            }
+        };
+
+        template <class R, class A>
+        struct sequence_forwarder_impl<R, A, void_t<decltype(std::declval<R>().resize(
+              std::declval<std::size_t>()))>>
+        {
+            template <class T>
+            static inline auto forward(const T& r)
+            {
+                return R(std::begin(r), std::end(r));
+            }
+        };
+
+        template <class R, class A>
+        struct sequence_forwarder
+            : sequence_forwarder_impl<R, A>
+        {
+        };
+
+        template <class R>
+        struct sequence_forwarder<R, R>
+        {
+            template <class T>
+            static inline T&& forward(T&& t) noexcept
+            {
+                return std::forward<T>(t);
+            }
+        };
+
+        template <class R, class A>
+        using forwarder_type = detail::sequence_forwarder<
+            std::decay_t<R>,
+            std::remove_cv_t<std::remove_reference_t<A>>
+        >;
+    }
+
+    template <class R, class A>
+    inline decltype(auto) forward_sequence(typename std::remove_reference<A>::type& s)
+    {
+        using forwarder = detail::forwarder_type<R, A>;
+        return forwarder::forward(std::forward<A>(s));
+    }
+
+    template <class R, class A>
+    inline decltype(auto) forward_sequence(typename std::remove_reference<A>::type&& s)
+    {
+        using forwarder = detail::forwarder_type<R, A>;
+        static_assert(!std::is_lvalue_reference<A>::value,
+                      "Can not forward an rvalue as an lvalue.");
+        return forwarder::forward(std::move(s));
+    }
+
+    /********************************
+     * sequence_size implementation *
+     ********************************/
+
+    // equivalent to std::size(c) in c++17
+    template <class C>
+    constexpr auto sequence_size(const C& c) -> decltype(c.size())
+    {
+        return c.size();
+    }
+
+    // equivalent to std::size(a) in c++17
+    template <class T, std::size_t N>
+    constexpr std::size_t sequence_size(const T (&)[N])
+    {
+        return N;
+    }
+
+    /****************************
+     * are_equivalent_sequences *
+     ****************************/
+
+    template <class E1, class E2>
+    inline bool are_equivalent_sequences(const E1& e1, const E2& e2)
+    {
+        return std::equal(e1.cbegin(), e1.cend(), e2.cbegin(), e2.cend());
+    }
+}
+
+#endif

+ 21 - 0
3rd/numpy/include/xtl/xspan.hpp

@@ -0,0 +1,21 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XSPAN_HPP
+#define XTL_XSPAN_HPP
+
+#include "xspan_impl.hpp"
+
+namespace xtl
+{
+	using tcb::span;
+	constexpr std::ptrdiff_t dynamic_extent = tcb::dynamic_extent;
+}
+
+#endif

+ 779 - 0
3rd/numpy/include/xtl/xspan_impl.hpp

@@ -0,0 +1,779 @@
+// https://github.com/tcbrindle/span/blob/master/include/tcb/span.hpp
+// TCP SPAN @commit cd0c6d0
+
+/*
+This is an implementation of std::span from P0122R7
+http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0122r7.pdf
+*/
+
+//          Copyright Tristan Brindle 2018.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file ../../LICENSE_1_0.txt or copy at
+//          https://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef TCB_SPAN_HPP_INCLUDED
+#define TCB_SPAN_HPP_INCLUDED
+
+#include <array>
+#include <cstddef>
+#include <exception>
+#include <type_traits>
+
+#ifndef TCB_SPAN_NO_EXCEPTIONS
+// Attempt to discover whether we're being compiled with exception support
+#if !(defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND))
+#define TCB_SPAN_NO_EXCEPTIONS
+#endif
+#endif
+
+#ifndef TCB_SPAN_NO_EXCEPTIONS
+#include <cstdio>
+#include <stdexcept>
+#endif
+
+// Various feature test macros
+
+#ifndef TCB_SPAN_NAMESPACE_NAME
+#define TCB_SPAN_NAMESPACE_NAME tcb
+#endif
+
+#ifdef TCB_SPAN_STD_COMPLIANT_MODE
+#define TCB_SPAN_NO_DEPRECATION_WARNINGS
+#endif
+
+#ifndef TCB_SPAN_NO_DEPRECATION_WARNINGS
+#define TCB_SPAN_DEPRECATED_FOR(msg) [[deprecated(msg)]]
+#else
+#define TCB_SPAN_DEPRECATED_FOR(msg)
+#endif
+
+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#define TCB_SPAN_HAVE_CPP17
+#endif
+
+#if __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
+#define TCB_SPAN_HAVE_CPP14
+#endif
+
+namespace TCB_SPAN_NAMESPACE_NAME {
+
+// Establish default contract checking behavior
+#if !defined(TCB_SPAN_THROW_ON_CONTRACT_VIOLATION) &&                          \
+    !defined(TCB_SPAN_TERMINATE_ON_CONTRACT_VIOLATION) &&                      \
+    !defined(TCB_SPAN_NO_CONTRACT_CHECKING)
+#if defined(NDEBUG) || !defined(TCB_SPAN_HAVE_CPP14)
+#define TCB_SPAN_NO_CONTRACT_CHECKING
+#else
+#define TCB_SPAN_TERMINATE_ON_CONTRACT_VIOLATION
+#endif
+#endif
+
+#if defined(TCB_SPAN_THROW_ON_CONTRACT_VIOLATION)
+struct contract_violation_error : std::logic_error {
+    explicit contract_violation_error(const char* msg) : std::logic_error(msg)
+    {}
+};
+
+inline void contract_violation(const char* msg)
+{
+    throw contract_violation_error(msg);
+}
+
+#elif defined(TCB_SPAN_TERMINATE_ON_CONTRACT_VIOLATION)
+[[noreturn]] inline void contract_violation(const char* /*unused*/)
+{
+    std::terminate();
+}
+#endif
+
+#if !defined(TCB_SPAN_NO_CONTRACT_CHECKING)
+#define TCB_SPAN_STRINGIFY(cond) #cond
+#define TCB_SPAN_EXPECT(cond)                                                  \
+    cond ? (void) 0 : contract_violation("Expected " TCB_SPAN_STRINGIFY(cond))
+#else
+#define TCB_SPAN_EXPECT(cond)
+#endif
+
+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_inline_variables)
+#define TCB_SPAN_INLINE_VAR inline
+#else
+#define TCB_SPAN_INLINE_VAR
+#endif
+
+#if defined(TCB_SPAN_HAVE_CPP14) ||                                                 \
+    (defined(__cpp_constexpr) && __cpp_constexpr >= 201304)
+#define TCB_SPAN_CONSTEXPR14 constexpr
+#else
+#define TCB_SPAN_CONSTEXPR14
+#endif
+
+#if defined(TCB_SPAN_NO_CONTRACT_CHECKING)
+#define TCB_SPAN_CONSTEXPR11 constexpr
+#else
+#define TCB_SPAN_CONSTEXPR11 TCB_SPAN_CONSTEXPR14
+#endif
+
+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_deduction_guides)
+#define TCB_SPAN_HAVE_DEDUCTION_GUIDES
+#endif
+
+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_lib_byte) && !(defined(_HAS_STD_BYTE) && !_HAS_STD_BYTE)
+#define TCB_SPAN_HAVE_STD_BYTE
+#endif
+
+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_lib_array_constexpr)
+#define TCB_SPAN_HAVE_CONSTEXPR_STD_ARRAY_ETC
+#endif
+
+#if defined(TCB_SPAN_HAVE_CONSTEXPR_STD_ARRAY_ETC)
+#define TCB_SPAN_ARRAY_CONSTEXPR constexpr
+#else
+#define TCB_SPAN_ARRAY_CONSTEXPR
+#endif
+
+#ifdef TCB_SPAN_HAVE_STD_BYTE
+using byte = std::byte;
+#else
+using byte = unsigned char;
+#endif
+
+TCB_SPAN_INLINE_VAR constexpr std::ptrdiff_t dynamic_extent = -1;
+
+template <typename ElementType, std::ptrdiff_t Extent = dynamic_extent>
+class span;
+
+namespace detail {
+
+template <typename E, std::ptrdiff_t S>
+struct span_storage {
+    constexpr span_storage() noexcept = default;
+
+    constexpr span_storage(E* aptr, std::ptrdiff_t /*unused*/) noexcept
+        : ptr(aptr)
+    {}
+
+    E* ptr = nullptr;
+    static constexpr std::ptrdiff_t size = S;
+};
+
+template <typename E>
+struct span_storage<E, dynamic_extent> {
+    constexpr span_storage() noexcept = default;
+
+    constexpr span_storage(E* aptr, std::size_t asize) noexcept
+        : ptr(aptr), size(asize)
+    {}
+
+    E* ptr = nullptr;
+    std::size_t size = 0;
+};
+
+// Reimplementation of C++17 std::size() and std::data()
+#if defined(TCB_SPAN_HAVE_CPP17) ||                                            \
+    defined(__cpp_lib_nonmember_container_access)
+using std::data;
+using std::size;
+#else
+template <class C>
+constexpr auto size(const C& c) -> decltype(c.size())
+{
+    return c.size();
+}
+
+template <class T, std::size_t N>
+constexpr std::size_t size(const T (&)[N]) noexcept
+{
+    return N;
+}
+
+template <class C>
+constexpr auto data(C& c) -> decltype(c.data())
+{
+    return c.data();
+}
+
+template <class C>
+constexpr auto data(const C& c) -> decltype(c.data())
+{
+    return c.data();
+}
+
+template <class T, std::size_t N>
+constexpr T* data(T (&array)[N]) noexcept
+{
+    return array;
+}
+
+template <class E>
+constexpr const E* data(std::initializer_list<E> il) noexcept
+{
+    return il.begin();
+}
+#endif // TCB_SPAN_HAVE_CPP17
+
+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_lib_void_t)
+using std::void_t;
+#else
+template <typename...>
+using void_t = void;
+#endif
+
+template <typename T>
+using uncvref_t =
+    typename std::remove_cv<typename std::remove_reference<T>::type>::type;
+
+template <typename>
+struct is_span : std::false_type {};
+
+template <typename T, std::ptrdiff_t S>
+struct is_span<span<T, S>> : std::true_type {};
+
+template <typename>
+struct is_std_array : std::false_type {};
+
+template <typename T, std::size_t N>
+struct is_std_array<std::array<T, N>> : std::true_type {};
+
+template <typename, typename = void>
+struct has_size_and_data : std::false_type {};
+
+template <typename T>
+struct has_size_and_data<T, void_t<decltype(detail::size(std::declval<T>())),
+                                   decltype(detail::data(std::declval<T>()))>>
+    : std::true_type {};
+
+template <typename C, typename U = uncvref_t<C>>
+struct is_container {
+    static constexpr bool value =
+        !is_span<U>::value && !is_std_array<U>::value &&
+        !std::is_array<U>::value && has_size_and_data<C>::value;
+};
+
+template <typename T>
+using remove_pointer_t = typename std::remove_pointer<T>::type;
+
+template <typename, typename, typename = void>
+struct is_container_element_type_compatible : std::false_type {};
+
+template <typename T, typename E>
+struct is_container_element_type_compatible<
+    T, E, void_t<decltype(detail::data(std::declval<T>()))>>
+    : std::is_convertible<
+          remove_pointer_t<decltype(detail::data(std::declval<T>()))> (*)[],
+          E (*)[]> {};
+
+template <typename, typename = size_t>
+struct is_complete : std::false_type {};
+
+template <typename T>
+struct is_complete<T, decltype(sizeof(T))> : std::true_type {};
+
+} // namespace detail
+
+template <typename ElementType, std::ptrdiff_t Extent>
+class span {
+    static_assert(Extent == dynamic_extent || Extent >= 0,
+                  "A span must have an extent greater than or equal to zero, "
+                  "or a dynamic extent");
+    static_assert(std::is_object<ElementType>::value,
+                  "A span's ElementType must be an object type (not a "
+                  "reference type or void)");
+    static_assert(detail::is_complete<ElementType>::value,
+                  "A span's ElementType must be a complete type (not a forward "
+                  "declaration)");
+    static_assert(!std::is_abstract<ElementType>::value,
+                  "A span's ElementType cannot be an abstract class type");
+
+    using storage_type = detail::span_storage<ElementType, Extent>;
+
+public:
+    // constants and types
+    using element_type = ElementType;
+    using value_type = typename std::remove_cv<ElementType>::type;
+    using index_type = std::size_t;
+    using difference_type = std::ptrdiff_t;
+    using pointer = ElementType*;
+    using reference = ElementType&;
+    using iterator = pointer;
+    using const_iterator = const ElementType*;
+    using reverse_iterator = std::reverse_iterator<iterator>;
+    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+    static constexpr index_type extent = static_cast<index_type>(Extent);
+
+    // [span.cons], span constructors, copy, assignment, and destructor
+    template <std::ptrdiff_t E = Extent,
+              typename std::enable_if<E <= 0, int>::type = 0>
+    constexpr span() noexcept
+    {}
+
+    TCB_SPAN_CONSTEXPR11 span(pointer ptr, index_type count)
+        : storage_(ptr, count)
+    {
+        TCB_SPAN_EXPECT(extent == dynamic_extent || count == extent);
+    }
+
+    TCB_SPAN_CONSTEXPR11 span(pointer first_elem, pointer last_elem)
+        : storage_(first_elem, last_elem - first_elem)
+    {
+        TCB_SPAN_EXPECT(extent == dynamic_extent ||
+                        last_elem - first_elem == extent);
+    }
+
+    template <
+        std::size_t N, std::ptrdiff_t E = Extent,
+        typename std::enable_if<
+            (E == dynamic_extent || static_cast<std::ptrdiff_t>(N) == E) &&
+                detail::is_container_element_type_compatible<
+                    element_type (&)[N], ElementType>::value,
+            int>::type = 0>
+    constexpr span(element_type (&arr)[N]) noexcept : storage_(arr, N)
+    {}
+
+    template <
+        std::size_t N, std::ptrdiff_t E = Extent,
+        typename std::enable_if<
+            (E == dynamic_extent || static_cast<std::ptrdiff_t>(N) == E) &&
+                detail::is_container_element_type_compatible<
+                    std::array<value_type, N>&, ElementType>::value,
+            int>::type = 0>
+    TCB_SPAN_ARRAY_CONSTEXPR span(std::array<value_type, N>& arr) noexcept
+        : storage_(arr.data(), N)
+    {}
+
+    template <
+        std::size_t N, std::ptrdiff_t E = Extent,
+        typename std::enable_if<
+            (E == dynamic_extent || static_cast<std::ptrdiff_t>(N) == E) &&
+                detail::is_container_element_type_compatible<
+                    const std::array<value_type, N>&, ElementType>::value,
+            int>::type = 0>
+    TCB_SPAN_ARRAY_CONSTEXPR span(const std::array<value_type, N>& arr) noexcept
+        : storage_(arr.data(), N)
+    {}
+
+    template <typename Container,
+              typename std::enable_if<
+                  detail::is_container<Container>::value &&
+                      detail::is_container_element_type_compatible<
+                          Container&, ElementType>::value,
+                  int>::type = 0>
+    TCB_SPAN_CONSTEXPR11 span(Container& cont)
+        : storage_(detail::data(cont), detail::size(cont))
+    {
+        TCB_SPAN_EXPECT(extent == dynamic_extent ||
+                        static_cast<std::ptrdiff_t>(detail::size(cont)) ==
+                            extent);
+    }
+
+    template <typename Container,
+              typename std::enable_if<
+                  detail::is_container<Container>::value &&
+                      detail::is_container_element_type_compatible<
+                          const Container&, ElementType>::value,
+                  int>::type = 0>
+    TCB_SPAN_CONSTEXPR11 span(const Container& cont)
+        : storage_(detail::data(cont), detail::size(cont))
+    {
+        TCB_SPAN_EXPECT(extent == dynamic_extent ||
+                        static_cast<std::ptrdiff_t>(detail::size(cont)) ==
+                            extent);
+    }
+
+    constexpr span(const span& other) noexcept = default;
+
+    template <typename OtherElementType, std::ptrdiff_t OtherExtent,
+              typename std::enable_if<
+                  (Extent == OtherExtent || Extent == dynamic_extent) &&
+                      std::is_convertible<OtherElementType (*)[],
+                                          ElementType (*)[]>::value,
+                  int>::type = 0>
+    constexpr span(const span<OtherElementType, OtherExtent>& other) noexcept
+        : storage_(other.data(), other.size())
+    {}
+
+    ~span() noexcept = default;
+
+    span& operator=(const span& other) noexcept = default;
+
+    // [span.sub], span subviews
+    template <std::ptrdiff_t Count>
+    TCB_SPAN_CONSTEXPR11 span<element_type, Count> first() const
+    {
+        TCB_SPAN_EXPECT(Count >= 0 && Count <= size());
+        return {data(), Count};
+    }
+
+    template <std::ptrdiff_t Count>
+    TCB_SPAN_CONSTEXPR11 span<element_type, Count> last() const
+    {
+        TCB_SPAN_EXPECT(Count >= 0 && Count <= size());
+        return {data() + (size() - Count), Count};
+    }
+
+    template <std::ptrdiff_t Offset, std::ptrdiff_t Count = dynamic_extent>
+    using subspan_return_t =
+        span<ElementType, Count != dynamic_extent
+                              ? Count
+                              : (Extent != dynamic_extent ? Extent - Offset
+                                                          : dynamic_extent)>;
+
+    template <std::ptrdiff_t Offset, std::ptrdiff_t Count = dynamic_extent>
+    TCB_SPAN_CONSTEXPR11 subspan_return_t<Offset, Count> subspan() const
+    {
+        TCB_SPAN_EXPECT((Offset >= 0 && Offset <= size()) &&
+                        (Count == dynamic_extent ||
+                         (Count >= 0 && Offset + Count <= size())));
+        return {data() + Offset,
+                Count != dynamic_extent
+                    ? Count
+                    : (Extent != dynamic_extent ? Extent - Offset
+                                                : size() - Offset)};
+    }
+
+    TCB_SPAN_CONSTEXPR11 span<element_type, dynamic_extent>
+    first(index_type count) const
+    {
+        TCB_SPAN_EXPECT(count >= 0 && count <= size());
+        return {data(), count};
+    }
+
+    TCB_SPAN_CONSTEXPR11 span<element_type, dynamic_extent>
+    last(index_type count) const
+    {
+        TCB_SPAN_EXPECT(count >= 0 && count <= size());
+        return {data() + (size() - count), count};
+    }
+
+    TCB_SPAN_CONSTEXPR11 span<element_type, dynamic_extent>
+    subspan(index_type offset, index_type count = static_cast<index_type>(dynamic_extent)) const
+    {
+        TCB_SPAN_EXPECT((offset >= 0 && offset <= size()) &&
+                        (count == dynamic_extent ||
+                         (count >= 0 && offset + count <= size())));
+        return {data() + offset,
+                count == dynamic_extent ? size() - offset : count};
+    }
+
+    // [span.obs], span observers
+    constexpr index_type size() const noexcept { return storage_.size; }
+
+    constexpr index_type size_bytes() const noexcept
+    {
+        return size() * sizeof(element_type);
+    }
+
+    constexpr bool empty() const noexcept { return size() == 0; }
+
+    // [span.elem], span element access
+    TCB_SPAN_CONSTEXPR11 reference operator[](index_type idx) const
+    {
+        TCB_SPAN_EXPECT(idx >= 0 && idx < size());
+        return *(data() + idx);
+    }
+
+    /* Extension: not in P0122 */
+#ifndef TCB_SPAN_STD_COMPLIANT_MODE
+    TCB_SPAN_CONSTEXPR14 reference at(index_type idx) const
+    {
+#ifndef TCB_SPAN_NO_EXCEPTIONS
+        if (idx < 0 || idx >= size()) {
+            char msgbuf[64] = {
+                0,
+            };
+            std::snprintf(msgbuf, sizeof(msgbuf),
+                          "Index %td is out of range for span of size %td", idx,
+                          size());
+            throw std::out_of_range{msgbuf};
+        }
+#endif // TCB_SPAN_NO_EXCEPTIONS
+        return this->operator[](idx);
+    }
+
+    TCB_SPAN_CONSTEXPR11 reference front() const
+    {
+        TCB_SPAN_EXPECT(!empty());
+        return *data();
+    }
+
+    TCB_SPAN_CONSTEXPR11 reference back() const
+    {
+        TCB_SPAN_EXPECT(!empty());
+        return *(data() + (size() - 1));
+    }
+
+#endif // TCB_SPAN_STD_COMPLIANT_MODE
+
+#ifndef TCB_SPAN_NO_FUNCTION_CALL_OPERATOR
+    TCB_SPAN_DEPRECATED_FOR("Use operator[] instead")
+    constexpr reference operator()(index_type idx) const
+    {
+        return this->operator[](idx);
+    }
+#endif // TCB_SPAN_NO_FUNCTION_CALL_OPERATOR
+
+    constexpr pointer data() const noexcept { return storage_.ptr; }
+
+    // [span.iterators], span iterator support
+    constexpr iterator begin() const noexcept { return data(); }
+
+    constexpr iterator end() const noexcept { return data() + size(); }
+
+    constexpr const_iterator cbegin() const noexcept { return begin(); }
+
+    constexpr const_iterator cend() const noexcept { return end(); }
+
+    TCB_SPAN_ARRAY_CONSTEXPR reverse_iterator rbegin() const noexcept
+    {
+        return reverse_iterator(end());
+    }
+
+    TCB_SPAN_ARRAY_CONSTEXPR reverse_iterator rend() const noexcept
+    {
+        return reverse_iterator(begin());
+    }
+
+    TCB_SPAN_ARRAY_CONSTEXPR const_reverse_iterator crbegin() const noexcept
+    {
+        return const_reverse_iterator(cend());
+    }
+
+    TCB_SPAN_ARRAY_CONSTEXPR const_reverse_iterator crend() const noexcept
+    {
+        return const_reverse_iterator(cbegin());
+    }
+
+private:
+    storage_type storage_{};
+};
+
+#ifdef TCB_SPAN_HAVE_DEDUCTION_GUIDES
+
+/* Deduction Guides */
+template <class T, size_t N>
+span(T (&)[N])->span<T, N>;
+
+template <class T, size_t N>
+span(std::array<T, N>&)->span<T, N>;
+
+template <class T, size_t N>
+span(const std::array<T, N>&)->span<const T, N>;
+
+template <class Container>
+span(Container&)->span<typename Container::value_type>;
+
+template <class Container>
+span(const Container&)->span<const typename Container::value_type>;
+
+#endif // TCB_HAVE_DEDUCTION_GUIDES
+
+template <typename ElementType, std::ptrdiff_t Extent>
+constexpr span<ElementType, Extent>
+make_span(span<ElementType, Extent> s) noexcept
+{
+    return s;
+}
+
+#define AS_SIGNED(N) static_cast<std::ptrdiff_t>(N)
+
+template <typename T, std::size_t N>
+constexpr span<T, AS_SIGNED(N)> make_span(T (&arr)[N]) noexcept
+{
+    return {arr};
+}
+
+template <typename T, std::size_t N>
+TCB_SPAN_ARRAY_CONSTEXPR span<T, AS_SIGNED(N)> make_span(std::array<T, N>& arr) noexcept
+{
+    return {arr};
+}
+
+template <typename T, std::size_t N>
+TCB_SPAN_ARRAY_CONSTEXPR span<const T, AS_SIGNED(N)>
+make_span(const std::array<T, N>& arr) noexcept
+{
+    return {arr};
+}
+
+#undef AS_SIGNED
+
+template <typename Container>
+constexpr span<typename Container::value_type> make_span(Container& cont)
+{
+    return {cont};
+}
+
+template <typename Container>
+constexpr span<const typename Container::value_type>
+make_span(const Container& cont)
+{
+    return {cont};
+}
+
+/* Comparison operators */
+// Implementation note: the implementations of == and < are equivalent to
+// 4-legged std::equal and std::lexicographical_compare respectively
+
+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>
+TCB_SPAN_CONSTEXPR14 bool operator==(span<T, X> lhs, span<U, Y> rhs)
+{
+    if (lhs.size() != rhs.size()) {
+        return false;
+    }
+
+    for (std::ptrdiff_t i = 0; i < lhs.size(); i++) {
+        if (lhs[i] != rhs[i]) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>
+TCB_SPAN_CONSTEXPR14 bool operator!=(span<T, X> lhs, span<U, Y> rhs)
+{
+    return !(lhs == rhs);
+}
+
+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>
+TCB_SPAN_CONSTEXPR14 bool operator<(span<T, X> lhs, span<U, Y> rhs)
+{
+    // No std::min to avoid dragging in <algorithm>
+    const std::ptrdiff_t size =
+        lhs.size() < rhs.size() ? lhs.size() : rhs.size();
+
+    for (std::ptrdiff_t i = 0; i < size; i++) {
+        if (lhs[i] < rhs[i]) {
+            return true;
+        }
+        if (lhs[i] > rhs[i]) {
+            return false;
+        }
+    }
+    return lhs.size() < rhs.size();
+}
+
+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>
+TCB_SPAN_CONSTEXPR14 bool operator<=(span<T, X> lhs, span<U, Y> rhs)
+{
+    return !(rhs < lhs);
+}
+
+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>
+TCB_SPAN_CONSTEXPR14 bool operator>(span<T, X> lhs, span<U, Y> rhs)
+{
+    return rhs < lhs;
+}
+
+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>
+TCB_SPAN_CONSTEXPR14 bool operator>=(span<T, X> lhs, span<U, Y> rhs)
+{
+    return !(lhs < rhs);
+}
+
+template <typename ElementType, std::ptrdiff_t Extent>
+span<const byte, ((Extent == dynamic_extent)
+                      ? dynamic_extent
+                      : (static_cast<ptrdiff_t>(sizeof(ElementType)) * Extent))>
+as_bytes(span<ElementType, Extent> s) noexcept
+{
+    return {reinterpret_cast<const byte*>(s.data()), s.size_bytes()};
+}
+
+template <
+    class ElementType, ptrdiff_t Extent,
+    typename std::enable_if<!std::is_const<ElementType>::value, int>::type = 0>
+span<byte, ((Extent == dynamic_extent)
+                ? dynamic_extent
+                : (static_cast<ptrdiff_t>(sizeof(ElementType)) * Extent))>
+as_writable_bytes(span<ElementType, Extent> s) noexcept
+{
+    return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
+}
+
+/* Extension: nonmember subview operations */
+
+#ifndef TCB_SPAN_STD_COMPLIANT_MODE
+
+template <std::ptrdiff_t Count, typename T>
+TCB_SPAN_CONSTEXPR11 auto first(T& t)
+    -> decltype(make_span(t).template first<Count>())
+{
+    return make_span(t).template first<Count>();
+}
+
+template <std::ptrdiff_t Count, typename T>
+TCB_SPAN_CONSTEXPR11 auto last(T& t)
+    -> decltype(make_span(t).template last<Count>())
+{
+    return make_span(t).template last<Count>();
+}
+
+template <std::ptrdiff_t Offset, std::ptrdiff_t Count = dynamic_extent,
+          typename T>
+TCB_SPAN_CONSTEXPR11 auto subspan(T& t)
+    -> decltype(make_span(t).template subspan<Offset, Count>())
+{
+    return make_span(t).template subspan<Offset, Count>();
+}
+
+template <typename T>
+TCB_SPAN_CONSTEXPR11 auto first(T& t, std::ptrdiff_t count)
+    -> decltype(make_span(t).first(count))
+{
+    return make_span(t).first(count);
+}
+
+template <typename T>
+TCB_SPAN_CONSTEXPR11 auto last(T& t, std::ptrdiff_t count)
+    -> decltype(make_span(t).last(count))
+{
+    return make_span(t).last(count);
+}
+
+template <typename T>
+TCB_SPAN_CONSTEXPR11 auto subspan(T& t, std::ptrdiff_t offset,
+                                  std::ptrdiff_t count = dynamic_extent)
+    -> decltype(make_span(t).subspan(offset, count))
+{
+    return make_span(t).subspan(offset, count);
+}
+
+#endif // TCB_SPAN_STD_COMPLIANT_MODE
+
+} // namespace TCB_SPAN_NAMESPACE_NAME
+
+/* Extension: support for C++17 structured bindings */
+
+#ifndef TCB_SPAN_STD_COMPLIANT_MODE
+
+namespace TCB_SPAN_NAMESPACE_NAME {
+
+template <std::ptrdiff_t N, typename E, std::ptrdiff_t S>
+constexpr auto get(span<E, S> s) -> decltype(s[N])
+{
+    return s[N];
+}
+
+} // namespace TCB_SPAN_NAMESPACE_NAME
+
+namespace std {
+
+template <typename E, ptrdiff_t S>
+class tuple_size<tcb::span<E, S>> : public integral_constant<size_t, static_cast<size_t>(S)> {};
+
+template <typename E>
+class tuple_size<tcb::span<E, tcb::dynamic_extent>>; // not defined
+
+template <size_t N, typename E, ptrdiff_t S>
+class tuple_element<N, tcb::span<E, S>> {
+public:
+    using type = E;
+};
+
+} // end namespace std
+
+#endif // TCB_SPAN_STD_COMPLIANT_MODE
+
+#endif // TCB_SPAN_HPP_INCLUDED

+ 125 - 0
3rd/numpy/include/xtl/xsystem.hpp

@@ -0,0 +1,125 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XSYSTEM_HPP
+#define XTL_XSYSTEM_HPP
+
+#if defined(__linux__)
+#  include <unistd.h>
+#endif
+#if defined(_WIN32)
+#  if defined(NOMINMAX)
+#    include <windows.h>
+#  else
+#    define NOMINMAX
+#    include <windows.h>
+#    undef NOMINMAX
+#  endif
+#endif
+#ifdef __APPLE__
+#  include <cstdint>
+#  include <mach-o/dyld.h>
+#endif
+#if defined(__sun)
+#  include <stdlib.h>
+#endif
+#ifdef __FreeBSD__
+#  include <sys/types.h>
+#  include <sys/sysctl.h>
+#endif
+
+#include <cstring>
+#include <string>
+
+namespace xtl
+{
+    std::string executable_path();
+    std::string prefix_path();
+
+    /******************
+     * implementation *
+     ******************/
+    
+    inline std::string executable_path()
+    {
+        std::string path;
+#if defined(UNICODE)
+    wchar_t buffer[1024];
+#else
+    char buffer[1024];
+#endif
+        std::memset(buffer, '\0', sizeof(buffer));
+#if defined(__linux__)
+        if (readlink("/proc/self/exe", buffer, sizeof(buffer)) != -1)
+        {
+            path = buffer;
+        }
+        else
+        {
+            // failed to determine run path
+        }
+#elif defined (_WIN32)
+    #if defined(UNICODE)
+        if (GetModuleFileNameW(nullptr, buffer, sizeof(buffer)) != 0)
+        {
+            // Convert wchar_t to std::string
+            std::wstring wideString(buffer);
+            std::string narrowString(wideString.begin(), wideString.end());
+            path = narrowString;
+        }
+    #else
+        if (GetModuleFileNameA(nullptr, buffer, sizeof(buffer)) != 0)
+        {
+            path = buffer;
+        }
+    #endif
+        // failed to determine run path
+#elif defined (__APPLE__)
+        std::uint32_t size = sizeof(buffer);
+        if(_NSGetExecutablePath(buffer, &size) == 0)
+        {
+            path = buffer;
+        }
+        else
+        {
+            // failed to determine run path
+        }
+#elif defined (__FreeBSD__)
+        int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+        size_t buffer_size = sizeof(buffer);
+        if (sysctl(mib, 4, buffer, &buffer_size, NULL, 0) != -1)
+        {
+            path = buffer;
+        }
+        else
+        {
+            // failed to determine run path
+        }
+#elif defined(__sun)
+        path = getexecname();
+#endif
+        return path;
+    }
+
+    inline std::string prefix_path()
+    {
+        std::string path = executable_path();
+#if defined (_WIN32)
+        char separator = '\\';
+#else
+        char separator = '/';
+#endif
+        std::string bin_folder = path.substr(0, path.find_last_of(separator));
+        std::string prefix = bin_folder.substr(0, bin_folder.find_last_of(separator)) + separator;
+        return prefix;
+    }
+}
+
+#endif
+

+ 44 - 0
3rd/numpy/include/xtl/xtl_config.hpp

@@ -0,0 +1,44 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_CONFIG_HPP
+#define XTL_CONFIG_HPP
+
+#define XTL_VERSION_MAJOR 0
+#define XTL_VERSION_MINOR 7
+#define XTL_VERSION_PATCH 7
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+// Attempt to discover whether we're being compiled with exception support
+#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(XTL_NO_EXCEPTIONS)
+// Exceptions are enabled.
+#else
+#if !defined(XTL_NO_EXCEPTIONS)
+// Exceptions are disabled.
+#define XTL_NO_EXCEPTIONS
+#endif
+#endif
+
+#if defined(XTL_NO_EXCEPTIONS)
+
+#include <iostream>
+
+#define XTL_THROW(_, msg)              \
+    {                                  \
+        std::cerr << msg << std::endl; \
+        std::abort();                  \
+    }
+#else
+#define XTL_THROW(exception, msg) throw exception(msg)
+#endif
+
+#endif

+ 458 - 0
3rd/numpy/include/xtl/xtype_traits.hpp

@@ -0,0 +1,458 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_TYPE_TRAITS_HPP
+#define XTL_TYPE_TRAITS_HPP
+
+#include <complex>
+#include <chrono>
+#include <type_traits>
+
+#include "xtl_config.hpp"
+
+namespace xtl
+{
+    /************************************
+     * std proxy traits                 *
+     ************************************/
+
+    template <class T>
+    struct is_scalar : std::is_scalar<T>
+    {
+    };
+
+    template <class T>
+    struct is_arithmetic : std::is_arithmetic<T>
+    {
+    };
+
+    template <class T>
+    struct is_fundamental : std::is_fundamental<T>
+    {
+    };
+
+    template <class T>
+    struct is_signed : std::is_signed<T>
+    {
+    };
+
+    template <class T>
+    struct is_floating_point : std::is_floating_point<T>
+    {
+    };
+
+    template <class T>
+    struct is_integral : std::is_integral<T>
+    {
+    };
+
+    /************************************
+     * arithmetic type promotion traits *
+     ************************************/
+
+    /**
+     * Traits class for the result type of mixed arithmetic expressions.
+     * For example, <tt>promote_type<unsigned char, unsigned char>::type</tt> tells
+     * the user that <tt>unsigned char + unsigned char => int</tt>.
+     */
+    template <class... T>
+    struct promote_type;
+
+    template <>
+    struct promote_type<>
+    {
+        using type = void;
+    };
+
+    template <class T>
+    struct promote_type<T>
+    {
+        using type = typename promote_type<T, T>::type;
+    };
+
+    template <class C, class D1, class D2>
+    struct promote_type<std::chrono::time_point<C, D1>, std::chrono::time_point<C, D2>>
+    {
+        using type = std::chrono::time_point<C, typename promote_type<D1, D2>::type>;
+    };
+
+    template <class T0, class T1>
+    struct promote_type<T0, T1>
+    {
+        using type = decltype(std::declval<std::decay_t<T0>>() + std::declval<std::decay_t<T1>>());
+    };
+
+    template <>
+    struct promote_type<bool>
+    {
+        using type = bool;
+    };
+
+    template <class T>
+    struct promote_type<bool, T>
+    {
+        using type = T;
+    };
+
+    template <class T>
+    struct promote_type<bool, std::complex<T>>
+    {
+        using type = std::complex<T>;
+    };
+
+    template <class T1, class T2>
+    struct promote_type<T1, std::complex<T2>>
+    {
+        using type = std::complex<typename promote_type<T1, T2>::type>;
+    };
+
+    template <class T1, class T2>
+    struct promote_type<std::complex<T1>, T2>
+        : promote_type<T2, std::complex<T1>>
+    {
+    };
+
+    template <class T>
+    struct promote_type<std::complex<T>, std::complex<T>>
+    {
+        using type = std::complex<T>;
+    };
+
+    template <class T1, class T2>
+    struct promote_type<std::complex<T1>, std::complex<T2>>
+    {
+        using type = std::complex<typename promote_type<T1, T2>::type>;
+    };
+
+    template <class T, class... REST>
+    struct promote_type<T, REST...>
+    {
+        using type = typename promote_type<T, typename promote_type<REST...>::type>::type;
+    };
+
+    template <class T0, class T1, class... REST>
+    struct promote_type<std::complex<T0>, std::complex<T1>, REST...>
+    {
+        using type = std::complex<typename promote_type<T0, T1, REST...>::type>;
+    };
+
+    /**
+     * Abbreviation of 'typename promote_type<T>::type'.
+     */
+    template <class... T>
+    using promote_type_t = typename promote_type<T...>::type;
+
+    /**
+     * Traits class to find the biggest type of the same kind.
+     *
+     * For example, <tt>big_promote_type<unsigned char>::type</tt> is <tt>unsigned long long</tt>.
+     * The default implementation only supports built-in types and <tt>std::complex</tt>. All
+     * other types remain unchanged unless <tt>big_promote_type</tt> gets specialized for them.
+     */
+    template <class T>
+    struct big_promote_type
+    {
+    private:
+
+        using V = std::decay_t<T>;
+        static constexpr bool is_arithmetic = xtl::is_arithmetic<V>::value;
+        static constexpr bool is_signed = xtl::is_signed<V>::value;
+        static constexpr bool is_integral = xtl::is_integral<V>::value;
+        static constexpr bool is_long_double = std::is_same<V, long double>::value;
+
+    public:
+
+        using type = std::conditional_t<is_arithmetic,
+                        std::conditional_t<is_integral,
+                            std::conditional_t<is_signed, long long, unsigned long long>,
+                            std::conditional_t<is_long_double, long double, double>
+                        >,
+                        V
+                     >;
+    };
+
+    template <class T>
+    struct big_promote_type<std::complex<T>>
+    {
+        using type = std::complex<typename big_promote_type<T>::type>;
+    };
+
+    /**
+     * Abbreviation of 'typename big_promote_type<T>::type'.
+     */
+    template <class T>
+    using big_promote_type_t = typename big_promote_type<T>::type;
+
+    namespace traits_detail
+    {
+        using std::sqrt;
+
+        template <class T>
+        using real_promote_type_t = decltype(sqrt(std::declval<std::decay_t<T>>()));
+    }
+
+    /**
+     * Result type of algebraic expressions.
+     *
+     * For example, <tt>real_promote_type<int>::type</tt> tells the
+     * user that <tt>sqrt(int) => double</tt>.
+     */
+    template <class T>
+    struct real_promote_type
+    {
+        using type = traits_detail::real_promote_type_t<T>;
+    };
+
+    /**
+     * Abbreviation of 'typename real_promote_type<T>::type'.
+     */
+    template <class T>
+    using real_promote_type_t = typename real_promote_type<T>::type;
+
+    /**
+     * Traits class to replace 'bool' with 'uint8_t' and keep everything else.
+     *
+     * This is useful for scientific computing, where a boolean mask array is
+     * usually implemented as an array of bytes.
+     */
+    template <class T>
+    struct bool_promote_type
+    {
+        using type = typename std::conditional<std::is_same<T, bool>::value, uint8_t, T>::type;
+    };
+
+    /**
+     * Abbreviation for typename bool_promote_type<T>::type
+     */
+    template <class T>
+    using bool_promote_type_t = typename bool_promote_type<T>::type;
+
+    /************
+     * apply_cv *
+     ************/
+
+    namespace detail
+    {
+        template <class T, class U, bool = std::is_const<std::remove_reference_t<T>>::value,
+                  bool = std::is_volatile<std::remove_reference_t<T>>::value>
+        struct apply_cv_impl
+        {
+            using type = U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T, U, true, false>
+        {
+            using type = const U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T, U, false, true>
+        {
+            using type = volatile U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T, U, true, true>
+        {
+            using type = const volatile U;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, false, false>
+        {
+            using type = U&;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, true, false>
+        {
+            using type = const U&;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, false, true>
+        {
+            using type = volatile U&;
+        };
+
+        template <class T, class U>
+        struct apply_cv_impl<T&, U, true, true>
+        {
+            using type = const volatile U&;
+        };
+    }
+
+    template <class T, class U>
+    struct apply_cv
+    {
+        using type = typename detail::apply_cv_impl<T, U>::type;
+    };
+
+    template <class T, class U>
+    using apply_cv_t = typename apply_cv<T, U>::type;
+
+    /****************************************************************
+     * C++17 logical operators (disjunction, conjunction, negation) *
+     ****************************************************************/
+
+    /********************
+     * disjunction - or *
+     ********************/
+
+    template <class...>
+    struct disjunction;
+
+    template <>
+    struct disjunction<> : std::false_type
+    {
+    };
+
+    template <class Arg>
+    struct disjunction<Arg> : Arg
+    {
+    };
+
+    template <class Arg1, class Arg2, class... Args>
+    struct disjunction<Arg1, Arg2, Args...> : std::conditional_t<Arg1::value, Arg1, disjunction<Arg2, Args...>>
+    {
+    };
+
+    /*********************
+     * conjunction - and *
+     *********************/
+
+    template <class...>
+    struct conjunction;
+
+    template <>
+    struct conjunction<> : std::true_type
+    {
+    };
+
+    template <class Arg1>
+    struct conjunction<Arg1> : Arg1
+    {
+    };
+
+    template <class Arg1, class Arg2, class... Args>
+    struct conjunction<Arg1, Arg2, Args...> : std::conditional_t<Arg1::value, conjunction<Arg2, Args...>, Arg1>
+    {
+    };
+
+    /******************
+     * negation - not *
+     ******************/
+
+    template <class Arg>
+    struct negation : std::integral_constant<bool, !Arg::value>
+    {
+    };
+
+    /************
+     * concepts *
+     ************/
+
+#if !defined(__GNUC__) || (defined(__GNUC__) && (__GNUC__ >= 5))
+
+    template <class... C>
+    constexpr bool xtl_requires = conjunction<C...>::value;
+
+    template <class... C>
+    constexpr bool either = disjunction<C...>::value;
+
+    template <class... C>
+    constexpr bool disallow = xtl::negation<xtl::conjunction<C...>>::value;
+
+    template <class... C>
+    constexpr bool disallow_one = xtl::negation<xtl::disjunction<C...>>::value;
+
+    template <class... C>
+    using check_requires = std::enable_if_t<xtl_requires<C...>, int>;
+
+    template <class... C>
+    using check_either = std::enable_if_t<either<C...>, int>;
+
+    template <class... C>
+    using check_disallow = std::enable_if_t<disallow<C...>, int>;
+
+    template <class... C>
+    using check_disallow_one = std::enable_if_t<disallow_one<C...>, int>;
+
+#else
+
+    template <class... C>
+    using check_requires = std::enable_if_t<conjunction<C...>::value, int>;
+
+    template <class... C>
+    using check_either = std::enable_if_t<disjunction<C...>::value, int>;
+
+    template <class... C>
+    using check_disallow = std::enable_if_t<xtl::negation<xtl::conjunction<C...>>::value, int>;
+
+    template <class... C>
+    using check_disallow_one = std::enable_if_t<xtl::negation<xtl::disjunction<C...>>::value, int>;
+
+#endif
+
+#define XTL_REQUIRES_IMPL(...) xtl::check_requires<__VA_ARGS__>
+#define XTL_REQUIRES(...) XTL_REQUIRES_IMPL(__VA_ARGS__) = 0
+
+#define XTL_EITHER_IMPL(...) xtl::check_either<__VA_ARGS__>
+#define XTL_EITHER(...) XTL_EITHER_IMPL(__VA_ARGS__) = 0
+
+#define XTL_DISALLOW_IMPL(...) xtl::check_disallow<__VA_ARGS__>
+#define XTL_DISALLOW(...) XTL_DISALLOW_IMPL(__VA_ARGS__) = 0
+
+#define XTL_DISALLOW_ONE_IMPL(...) xtl::check_disallow_one<__VA_ARGS__>
+#define XTL_DISALLOW_ONE(...) XTL_DISALLOW_ONE_IMPL(__VA_ARGS__) = 0
+
+    // For backward compatibility
+    template <class... C>
+    using check_concept = check_requires<C...>;
+
+    /**************
+     * all_scalar *
+     **************/
+
+    template <class... Args>
+    struct all_scalar : conjunction<xtl::is_scalar<Args>...>
+    {
+    };
+
+    /************
+     * constify *
+     ************/
+
+    // Adds const to the underlying type of a reference or pointer, or to the type itself
+    // if it's not a reference nor a pointer
+
+    template <class T>
+    struct constify
+    {
+        using type = std::add_const_t<T>;
+    };
+
+    template <class T>
+    struct constify<T*>
+    {
+        using type = std::add_const_t<T>*;
+    };
+
+    template <class T>
+    struct constify<T&>
+    {
+        using type = std::add_const_t<T>&;
+    };
+
+    template <class T>
+    using constify_t = typename constify<T>::type;
+}
+
+#endif

+ 206 - 0
3rd/numpy/include/xtl/xvariant.hpp

@@ -0,0 +1,206 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_XVARIANT_HPP
+#define XTL_XVARIANT_HPP
+
+#include "xvariant_impl.hpp"
+#include "xclosure.hpp"
+#include "xmeta_utils.hpp"
+
+namespace xtl
+{
+    using mpark::variant;
+    using mpark::monostate;
+    using mpark::bad_variant_access;
+    using mpark::variant_size;
+#ifdef MPARK_VARIABLE_TEMPLATES
+    using mpark::variant_size_v;
+#endif
+    using mpark::variant_alternative;
+    using mpark::variant_alternative_t;
+    using mpark::variant_npos;
+
+    using mpark::visit;
+    using mpark::holds_alternative;
+    using mpark::get;
+    using mpark::get_if;
+
+    namespace detail
+    {
+        template <class T>
+        struct xgetter
+        {
+            template <class... Ts>
+            static constexpr T& get(xtl::variant<Ts...>& v)
+            {
+                return xtl::get<T>(v);
+            }
+
+            template <class... Ts>
+            static constexpr T&& get(xtl::variant<Ts...>&& v)
+            {
+                return xtl::get<T>(std::move(v));
+            }
+
+            template <class... Ts>
+            static constexpr const T& get(const xtl::variant<Ts...>& v)
+            {
+                return xtl::get<T>(v);
+            }
+
+            template <class... Ts>
+            static constexpr const T&& get(const xtl::variant<Ts...>&& v)
+            {
+                return xtl::get<T>(std::move(v));
+            }
+        };
+
+        template <class T>
+        struct xgetter<T&>
+        {
+            template <class... Ts>
+            static constexpr T& get(xtl::variant<Ts...>& v)
+            {
+                return xtl::get<xtl::xclosure_wrapper<T&>>(v).get();
+            }
+
+            template <class... Ts>
+            static constexpr T& get(xtl::variant<Ts...>&& v)
+            {
+                return xtl::get<xtl::xclosure_wrapper<T&>>(std::move(v)).get();
+            }
+
+            template <class... Ts>
+            static constexpr const T& get(const xtl::variant<Ts...>& v)
+            {
+                return xtl::get<xtl::xclosure_wrapper<T&>>(v).get();
+            }
+
+            template <class... Ts>
+            static constexpr const T& get(const xtl::variant<Ts...>&& v)
+            {
+                return xtl::get<xtl::xclosure_wrapper<T&>>(std::move(v)).get();
+            }
+        };
+
+        template <class T>
+        struct xgetter<const T&>
+        {
+            template <class... Ts>
+            static constexpr const T& get(const xtl::variant<Ts...>& v)
+            {
+                using cl_type = xtl::xclosure_wrapper<const T&>;
+                return get_impl(v, xtl::mpl::contains<xtl::mpl::vector<Ts...>, cl_type>());
+            }
+
+            template <class... Ts>
+            static constexpr const T& get(const xtl::variant<Ts...>&& v)
+            {
+                using cl_type = xtl::xclosure_wrapper<const T&>;
+                return get_impl(std::move(v), xtl::mpl::contains<xtl::mpl::vector<Ts...>, cl_type>());
+            }
+
+            template <class... Ts>
+            static constexpr const T& get(xtl::variant<Ts...>& v)
+            {
+                return get(static_cast<const xtl::variant<Ts...>&>(v));
+            }
+
+            template <class... Ts>
+            static constexpr const T& get(xtl::variant<Ts...>&& v)
+            {
+                return get(static_cast<const xtl::variant<Ts...>&&>(v));
+            }
+
+        private:
+
+            template <class... Ts>
+            static constexpr const T& get_impl(const xtl::variant<Ts...>& v, xtl::mpl::bool_<true>)
+            {
+                return xtl::get<xtl::xclosure_wrapper<const T&>>(v).get();
+            }
+
+            template <class... Ts>
+            static constexpr const T& get_impl(const xtl::variant<Ts...>& v, xtl::mpl::bool_<false>)
+            {
+                return static_cast<const xtl::xclosure_wrapper<T&>&>(xtl::get<xtl::xclosure_wrapper<T&>>(v)).get();
+            }
+
+            template <class... Ts>
+            static constexpr const T& get_impl(const xtl::variant<Ts...>&& v, xtl::mpl::bool_<true>)
+            {
+                return xtl::get<xtl::closure_wrapper<const T&>>(std::move(v)).get();
+            }
+
+            template <class... Ts>
+            static constexpr const T& get_impl(const xtl::variant<Ts...>&& v, xtl::mpl::bool_<false>)
+            {
+                return static_cast<const xtl::xclosure_wrapper<T&>&&>(xtl::get<xtl::xclosure_wrapper<T&>>(std::move(v))).get();
+            }
+        };
+    }
+
+    template <class T, class... Ts>
+    constexpr decltype(auto) xget(xtl::variant<Ts...>& v)
+    {
+        return detail::xgetter<T>::get(v);
+    }
+
+    template <class T, class... Ts>
+    constexpr decltype(auto) xget(xtl::variant<Ts...>&& v)
+    {
+        return detail::xgetter<T>::get(std::move(v));
+    }
+
+    template <class T, class... Ts>
+    constexpr decltype(auto) xget(const xtl::variant<Ts...>& v)
+    {
+        return detail::xgetter<T>::get(v);
+    }
+
+    template <class T, class... Ts>
+    constexpr decltype(auto) xget(const xtl::variant<Ts...>&& v)
+    {
+        return detail::xgetter<T>::get(std::move(v));
+    }
+
+    /************************
+     * overload for lambdas *
+     ************************/
+
+    // This hierarchy is required since ellipsis in using declarations are not supported until C++17
+    template <class... Ts>
+    struct overloaded;
+
+    template <class T>
+    struct overloaded<T> : T
+    {
+        overloaded(T arg) : T(arg) {}
+        using T::operator();
+    };
+
+    template <class T1, class T2, class... Ts>
+    struct overloaded<T1, T2, Ts...> : T1, overloaded<T2, Ts...>
+    {
+        template <class... Us>
+        overloaded(T1 t1, T2 t2, Us... args) : T1(t1), overloaded<T2, Ts...>(t2, args...) {}
+
+        using T1::operator();
+        using overloaded<T2, Ts...>::operator();
+    };
+
+    template <class... Ts>
+    inline overloaded<Ts...> make_overload(Ts... arg)
+    {
+        return overloaded<Ts...>{arg...};
+    }
+}
+
+#endif

+ 2818 - 0
3rd/numpy/include/xtl/xvariant_impl.hpp

@@ -0,0 +1,2818 @@
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt)
+
+// We cannot keep the header guard from mpark variant because
+// this can conflict with other libraries that depend on or embed
+// mpark variant.
+#ifndef XTL_MPARK_VARIANT_HPP
+#define XTL_MPARK_VARIANT_HPP
+
+/*
+   variant synopsis
+
+namespace std {
+
+  // 20.7.2, class template variant
+  template <class... Types>
+  class variant {
+  public:
+
+    // 20.7.2.1, constructors
+    constexpr variant() noexcept(see below);
+    variant(const variant&);
+    variant(variant&&) noexcept(see below);
+
+    template <class T> constexpr variant(T&&) noexcept(see below);
+
+    template <class T, class... Args>
+    constexpr explicit variant(in_place_type_t<T>, Args&&...);
+
+    template <class T, class U, class... Args>
+    constexpr explicit variant(
+        in_place_type_t<T>, initializer_list<U>, Args&&...);
+
+    template <size_t I, class... Args>
+    constexpr explicit variant(in_place_index_t<I>, Args&&...);
+
+    template <size_t I, class U, class... Args>
+    constexpr explicit variant(
+        in_place_index_t<I>, initializer_list<U>, Args&&...);
+
+    // 20.7.2.2, destructor
+    ~variant();
+
+    // 20.7.2.3, assignment
+    variant& operator=(const variant&);
+    variant& operator=(variant&&) noexcept(see below);
+
+    template <class T> variant& operator=(T&&) noexcept(see below);
+
+    // 20.7.2.4, modifiers
+    template <class T, class... Args>
+    T& emplace(Args&&...);
+
+    template <class T, class U, class... Args>
+    T& emplace(initializer_list<U>, Args&&...);
+
+    template <size_t I, class... Args>
+    variant_alternative<I, variant>& emplace(Args&&...);
+
+    template <size_t I, class U, class...  Args>
+    variant_alternative<I, variant>& emplace(initializer_list<U>, Args&&...);
+
+    // 20.7.2.5, value status
+    constexpr bool valueless_by_exception() const noexcept;
+    constexpr size_t index() const noexcept;
+
+    // 20.7.2.6, swap
+    void swap(variant&) noexcept(see below);
+  };
+
+  // 20.7.3, variant helper classes
+  template <class T> struct variant_size; // undefined
+
+  template <class T>
+  constexpr size_t variant_size_v = variant_size<T>::value;
+
+  template <class T> struct variant_size<const T>;
+  template <class T> struct variant_size<volatile T>;
+  template <class T> struct variant_size<const volatile T>;
+
+  template <class... Types>
+  struct variant_size<variant<Types...>>;
+
+  template <size_t I, class T> struct variant_alternative; // undefined
+
+  template <size_t I, class T>
+  using variant_alternative_t = typename variant_alternative<I, T>::type;
+
+  template <size_t I, class T> struct variant_alternative<I, const T>;
+  template <size_t I, class T> struct variant_alternative<I, volatile T>;
+  template <size_t I, class T> struct variant_alternative<I, const volatile T>;
+
+  template <size_t I, class... Types>
+  struct variant_alternative<I, variant<Types...>>;
+
+  constexpr size_t variant_npos = -1;
+
+  // 20.7.4, value access
+  template <class T, class... Types>
+  constexpr bool holds_alternative(const variant<Types...>&) noexcept;
+
+  template <size_t I, class... Types>
+  constexpr variant_alternative_t<I, variant<Types...>>&
+  get(variant<Types...>&);
+
+  template <size_t I, class... Types>
+  constexpr variant_alternative_t<I, variant<Types...>>&&
+  get(variant<Types...>&&);
+
+  template <size_t I, class... Types>
+  constexpr variant_alternative_t<I, variant<Types...>> const&
+  get(const variant<Types...>&);
+
+  template <size_t I, class... Types>
+  constexpr variant_alternative_t<I, variant<Types...>> const&&
+  get(const variant<Types...>&&);
+
+  template <class T, class...  Types>
+  constexpr T& get(variant<Types...>&);
+
+  template <class T, class... Types>
+  constexpr T&& get(variant<Types...>&&);
+
+  template <class T, class... Types>
+  constexpr const T& get(const variant<Types...>&);
+
+  template <class T, class... Types>
+  constexpr const T&& get(const variant<Types...>&&);
+
+  template <size_t I, class... Types>
+  constexpr add_pointer_t<variant_alternative_t<I, variant<Types...>>>
+  get_if(variant<Types...>*) noexcept;
+
+  template <size_t I, class... Types>
+  constexpr add_pointer_t<const variant_alternative_t<I, variant<Types...>>>
+  get_if(const variant<Types...>*) noexcept;
+
+  template <class T, class... Types>
+  constexpr add_pointer_t<T>
+  get_if(variant<Types...>*) noexcept;
+
+  template <class T, class... Types>
+  constexpr add_pointer_t<const T>
+  get_if(const variant<Types...>*) noexcept;
+
+  // 20.7.5, relational operators
+  template <class... Types>
+  constexpr bool operator==(const variant<Types...>&, const variant<Types...>&);
+
+  template <class... Types>
+  constexpr bool operator!=(const variant<Types...>&, const variant<Types...>&);
+
+  template <class... Types>
+  constexpr bool operator<(const variant<Types...>&, const variant<Types...>&);
+
+  template <class... Types>
+  constexpr bool operator>(const variant<Types...>&, const variant<Types...>&);
+
+  template <class... Types>
+  constexpr bool operator<=(const variant<Types...>&, const variant<Types...>&);
+
+  template <class... Types>
+  constexpr bool operator>=(const variant<Types...>&, const variant<Types...>&);
+
+  // 20.7.6, visitation
+  template <class Visitor, class... Variants>
+  constexpr see below visit(Visitor&&, Variants&&...);
+
+  // 20.7.7, class monostate
+  struct monostate;
+
+  // 20.7.8, monostate relational operators
+  constexpr bool operator<(monostate, monostate) noexcept;
+  constexpr bool operator>(monostate, monostate) noexcept;
+  constexpr bool operator<=(monostate, monostate) noexcept;
+  constexpr bool operator>=(monostate, monostate) noexcept;
+  constexpr bool operator==(monostate, monostate) noexcept;
+  constexpr bool operator!=(monostate, monostate) noexcept;
+
+  // 20.7.9, specialized algorithms
+  template <class... Types>
+  void swap(variant<Types...>&, variant<Types...>&) noexcept(see below);
+
+  // 20.7.10, class bad_variant_access
+  class bad_variant_access;
+
+  // 20.7.11, hash support
+  template <class T> struct hash;
+  template <class... Types> struct hash<variant<Types...>>;
+  template <> struct hash<monostate>;
+
+} // namespace std
+
+*/
+
+#include <cstddef>
+#include <exception>
+#include <functional>
+#include <initializer_list>
+#include <new>
+#include <type_traits>
+#include <utility>
+
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt)
+
+#ifndef XTL_MPARK_CONFIG_HPP
+#define XTL_MPARK_CONFIG_HPP
+
+// MSVC 2015 Update 3.
+#if __cplusplus < 201103L && (!defined(_MSC_VER) || _MSC_FULL_VER < 190024210)
+#error "MPark.Variant requires C++11 support."
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#if __has_attribute(always_inline) || defined(__GNUC__)
+#define MPARK_ALWAYS_INLINE __attribute__((__always_inline__)) inline
+#elif defined(_MSC_VER)
+#define MPARK_ALWAYS_INLINE __forceinline
+#else
+#define MPARK_ALWAYS_INLINE inline
+#endif
+
+#if __has_builtin(__builtin_addressof) || \
+    (defined(__GNUC__) && __GNUC__ >= 7) || defined(_MSC_VER)
+#define MPARK_BUILTIN_ADDRESSOF
+#endif
+
+#if __has_builtin(__builtin_unreachable) || defined(__GNUC__)
+#define MPARK_BUILTIN_UNREACHABLE __builtin_unreachable()
+#elif defined(_MSC_VER)
+#define MPARK_BUILTIN_UNREACHABLE __assume(false)
+#else
+#define MPARK_BUILTIN_UNREACHABLE
+#endif
+
+#if __has_builtin(__type_pack_element)
+#define MPARK_TYPE_PACK_ELEMENT
+#endif
+
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 200704 && \
+    !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 9)
+#define MPARK_CPP11_CONSTEXPR
+#endif
+
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
+#define MPARK_CPP14_CONSTEXPR
+#endif
+
+#if !defined(MPARK_NO_EXCEPTIONS) && \
+    (__has_feature(cxx_exceptions) || defined(__cpp_exceptions) || \
+    defined(__EXCEPTIONS) || (defined(_MSC_VER) && defined(_CPPUNWIND)))
+// Exceptions are enabled.
+#define MPARK_EXCEPTIONS
+#endif
+
+#if defined(__cpp_generic_lambdas) || defined(_MSC_VER)
+#define MPARK_GENERIC_LAMBDAS
+#endif
+
+#if defined(__cpp_lib_integer_sequence)
+#define MPARK_INTEGER_SEQUENCE
+#endif
+
+#if defined(__cpp_return_type_deduction) || defined(_MSC_VER)
+#define MPARK_RETURN_TYPE_DEDUCTION
+#endif
+
+#if defined(__cpp_lib_transparent_operators) || defined(_MSC_VER)
+#define MPARK_TRANSPARENT_OPERATORS
+#endif
+
+#if defined(__cpp_variable_templates) || defined(_MSC_VER)
+#define MPARK_VARIABLE_TEMPLATES
+#endif
+
+#if !defined(__GLIBCXX__) || __has_include(<codecvt>)  // >= libstdc++-5
+#define MPARK_TRIVIALITY_TYPE_TRAITS
+#define MPARK_INCOMPLETE_TYPE_TRAITS
+#endif
+
+#endif  // XTL_MPARK_CONFIG_HPP
+
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt)
+
+#ifndef XTL_MPARK_IN_PLACE_HPP
+#define XTL_MPARK_IN_PLACE_HPP
+
+#include <cstddef>
+
+
+namespace mpark {
+
+  struct in_place_t { explicit in_place_t() = default; };
+
+  template <std::size_t I>
+  struct in_place_index_t { explicit in_place_index_t() = default; };
+
+  template <typename T>
+  struct in_place_type_t { explicit in_place_type_t() = default; };
+
+#ifdef MPARK_VARIABLE_TEMPLATES
+  constexpr in_place_t in_place{};
+
+  template <std::size_t I> constexpr in_place_index_t<I> in_place_index{};
+
+  template <typename T> constexpr in_place_type_t<T> in_place_type{};
+#endif
+
+}  // namespace mpark
+
+#endif  // XTL_MPARK_IN_PLACE_HPP
+
+// MPark.Variant
+//
+// Copyright Michael Park, 2015-2017
+//
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt)
+
+#ifndef XTL_MPARK_LIB_HPP
+#define XTL_MPARK_LIB_HPP
+
+#include <memory>
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+
+#define MPARK_RETURN(...) \
+  noexcept(noexcept(__VA_ARGS__)) -> decltype(__VA_ARGS__) { return __VA_ARGS__; }
+
+namespace mpark {
+  namespace lib {
+    template <typename T>
+    struct identity { using type = T; };
+
+    inline namespace cpp14 {
+      template <typename T, std::size_t N>
+      struct array {
+        constexpr const T &operator[](std::size_t index) const {
+          return data[index];
+        }
+
+        T data[N == 0 ? 1 : N];
+      };
+
+      template <typename T>
+      using add_pointer_t = typename std::add_pointer<T>::type;
+
+      template <typename... Ts>
+      using common_type_t = typename std::common_type<Ts...>::type;
+
+      template <typename T>
+      using decay_t = typename std::decay<T>::type;
+
+      template <bool B, typename T = void>
+      using enable_if_t = typename std::enable_if<B, T>::type;
+
+      template <typename T>
+      using remove_const_t = typename std::remove_const<T>::type;
+
+      template <typename T>
+      using remove_reference_t = typename std::remove_reference<T>::type;
+
+      template <typename T>
+      inline constexpr T &&forward(remove_reference_t<T> &t) noexcept {
+        return static_cast<T &&>(t);
+      }
+
+      template <typename T>
+      inline constexpr T &&forward(remove_reference_t<T> &&t) noexcept {
+        static_assert(!std::is_lvalue_reference<T>::value,
+                      "can not forward an rvalue as an lvalue");
+        return static_cast<T &&>(t);
+      }
+
+      template <typename T>
+      inline constexpr remove_reference_t<T> &&move(T &&t) noexcept {
+        return static_cast<remove_reference_t<T> &&>(t);
+      }
+
+#ifdef MPARK_INTEGER_SEQUENCE
+      using std::integer_sequence;
+      using std::index_sequence;
+      using std::make_index_sequence;
+      using std::index_sequence_for;
+#else
+      template <typename T, T... Is>
+      struct integer_sequence {
+        using value_type = T;
+        static constexpr std::size_t size() noexcept { return sizeof...(Is); }
+      };
+
+      template <std::size_t... Is>
+      using index_sequence = integer_sequence<std::size_t, Is...>;
+
+      template <typename Lhs, typename Rhs>
+      struct make_index_sequence_concat;
+
+      template <std::size_t... Lhs, std::size_t... Rhs>
+      struct make_index_sequence_concat<index_sequence<Lhs...>,
+                                        index_sequence<Rhs...>>
+          : identity<index_sequence<Lhs..., (sizeof...(Lhs) + Rhs)...>> {};
+
+      template <std::size_t N>
+      struct make_index_sequence_impl;
+
+      template <std::size_t N>
+      using make_index_sequence = typename make_index_sequence_impl<N>::type;
+
+      template <std::size_t N>
+      struct make_index_sequence_impl
+          : make_index_sequence_concat<make_index_sequence<N / 2>,
+                                       make_index_sequence<N - (N / 2)>> {};
+
+      template <>
+      struct make_index_sequence_impl<0> : identity<index_sequence<>> {};
+
+      template <>
+      struct make_index_sequence_impl<1> : identity<index_sequence<0>> {};
+
+      template <typename... Ts>
+      using index_sequence_for = make_index_sequence<sizeof...(Ts)>;
+#endif
+
+      // <functional>
+#ifdef MPARK_TRANSPARENT_OPERATORS
+      using equal_to = std::equal_to<>;
+#else
+      struct equal_to {
+        template <typename Lhs, typename Rhs>
+        inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+          MPARK_RETURN(lib::forward<Lhs>(lhs) == lib::forward<Rhs>(rhs))
+      };
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+      using not_equal_to = std::not_equal_to<>;
+#else
+      struct not_equal_to {
+        template <typename Lhs, typename Rhs>
+        inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+          MPARK_RETURN(lib::forward<Lhs>(lhs) != lib::forward<Rhs>(rhs))
+      };
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+      using less = std::less<>;
+#else
+      struct less {
+        template <typename Lhs, typename Rhs>
+        inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+          MPARK_RETURN(lib::forward<Lhs>(lhs) < lib::forward<Rhs>(rhs))
+      };
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+      using greater = std::greater<>;
+#else
+      struct greater {
+        template <typename Lhs, typename Rhs>
+        inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+          MPARK_RETURN(lib::forward<Lhs>(lhs) > lib::forward<Rhs>(rhs))
+      };
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+      using less_equal = std::less_equal<>;
+#else
+      struct less_equal {
+        template <typename Lhs, typename Rhs>
+        inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+          MPARK_RETURN(lib::forward<Lhs>(lhs) <= lib::forward<Rhs>(rhs))
+      };
+#endif
+
+#ifdef MPARK_TRANSPARENT_OPERATORS
+      using greater_equal = std::greater_equal<>;
+#else
+      struct greater_equal {
+        template <typename Lhs, typename Rhs>
+        inline constexpr auto operator()(Lhs &&lhs, Rhs &&rhs) const
+          MPARK_RETURN(lib::forward<Lhs>(lhs) >= lib::forward<Rhs>(rhs))
+      };
+#endif
+    }  // namespace cpp14
+
+    inline namespace cpp17 {
+
+      // <type_traits>
+      template <bool B>
+      using bool_constant = std::integral_constant<bool, B>;
+
+      template <typename...>
+      struct voider : identity<void> {};
+
+      template <typename... Ts>
+      using void_t = typename voider<Ts...>::type;
+
+      namespace detail {
+        namespace swappable {
+
+          using std::swap;
+
+          template <typename T>
+          struct is_swappable {
+            private:
+            template <typename U,
+                      typename = decltype(swap(std::declval<U &>(),
+                                               std::declval<U &>()))>
+            inline static std::true_type test(int);
+
+            template <typename U>
+            inline static std::false_type test(...);
+
+            public:
+            static constexpr bool value = decltype(test<T>(0))::value;
+          };
+
+          template <bool IsSwappable, typename T>
+          struct is_nothrow_swappable {
+            static constexpr bool value =
+                noexcept(swap(std::declval<T &>(), std::declval<T &>()));
+          };
+
+          template <typename T>
+          struct is_nothrow_swappable<false, T> : std::false_type {};
+
+        }  // namespace swappable
+      }  // namespace detail
+
+      using detail::swappable::is_swappable;
+
+      template <typename T>
+      using is_nothrow_swappable =
+          detail::swappable::is_nothrow_swappable<is_swappable<T>::value, T>;
+
+      // <functional>
+      namespace detail {
+
+        template <typename T>
+        struct is_reference_wrapper : std::false_type {};
+
+        template <typename T>
+        struct is_reference_wrapper<std::reference_wrapper<T>>
+            : std::true_type {};
+
+        template <bool, int>
+        struct Invoke;
+
+        template <>
+        struct Invoke<true /* pmf */, 0 /* is_base_of */> {
+          template <typename R, typename T, typename Arg, typename... Args>
+          inline static constexpr auto invoke(R T::*pmf, Arg &&arg, Args &&... args)
+            MPARK_RETURN((lib::forward<Arg>(arg).*pmf)(lib::forward<Args>(args)...))
+        };
+
+        template <>
+        struct Invoke<true /* pmf */, 1 /* is_reference_wrapper */> {
+          template <typename R, typename T, typename Arg, typename... Args>
+          inline static constexpr auto invoke(R T::*pmf, Arg &&arg, Args &&... args)
+            MPARK_RETURN((lib::forward<Arg>(arg).get().*pmf)(lib::forward<Args>(args)...))
+        };
+
+        template <>
+        struct Invoke<true /* pmf */, 2 /* otherwise */> {
+          template <typename R, typename T, typename Arg, typename... Args>
+          inline static constexpr auto invoke(R T::*pmf, Arg &&arg, Args &&... args)
+            MPARK_RETURN(((*lib::forward<Arg>(arg)).*pmf)(lib::forward<Args>(args)...))
+        };
+
+        template <>
+        struct Invoke<false /* pmo */, 0 /* is_base_of */> {
+          template <typename R, typename T, typename Arg>
+          inline static constexpr auto invoke(R T::*pmo, Arg &&arg)
+            MPARK_RETURN(lib::forward<Arg>(arg).*pmo)
+        };
+
+        template <>
+        struct Invoke<false /* pmo */, 1 /* is_reference_wrapper */> {
+          template <typename R, typename T, typename Arg>
+          inline static constexpr auto invoke(R T::*pmo, Arg &&arg)
+            MPARK_RETURN(lib::forward<Arg>(arg).get().*pmo)
+        };
+
+        template <>
+        struct Invoke<false /* pmo */, 2 /* otherwise */> {
+          template <typename R, typename T, typename Arg>
+          inline static constexpr auto invoke(R T::*pmo, Arg &&arg)
+              MPARK_RETURN((*lib::forward<Arg>(arg)).*pmo)
+        };
+
+        template <typename R, typename T, typename Arg, typename... Args>
+        inline constexpr auto invoke(R T::*f, Arg &&arg, Args &&... args)
+          MPARK_RETURN(
+              Invoke<std::is_function<R>::value,
+                     (std::is_base_of<T, lib::decay_t<Arg>>::value
+                          ? 0
+                          : is_reference_wrapper<lib::decay_t<Arg>>::value
+                                ? 1
+                                : 2)>::invoke(f,
+                                              lib::forward<Arg>(arg),
+                                              lib::forward<Args>(args)...))
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#endif
+        template <typename F, typename... Args>
+        inline constexpr auto invoke(F &&f, Args &&... args)
+          MPARK_RETURN(lib::forward<F>(f)(lib::forward<Args>(args)...))
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+      }  // namespace detail
+
+      template <typename F, typename... Args>
+      inline constexpr auto invoke(F &&f, Args &&... args)
+        MPARK_RETURN(detail::invoke(lib::forward<F>(f),
+                                    lib::forward<Args>(args)...))
+
+      namespace detail {
+
+        template <typename Void, typename, typename...>
+        struct invoke_result {};
+
+        template <typename F, typename... Args>
+        struct invoke_result<void_t<decltype(lib::invoke(
+                                 std::declval<F>(), std::declval<Args>()...))>,
+                             F,
+                             Args...>
+            : identity<decltype(
+                  lib::invoke(std::declval<F>(), std::declval<Args>()...))> {};
+
+      }  // namespace detail
+
+      template <typename F, typename... Args>
+      using invoke_result = detail::invoke_result<void, F, Args...>;
+
+      template <typename F, typename... Args>
+      using invoke_result_t = typename invoke_result<F, Args...>::type;
+
+      namespace detail {
+
+        template <typename Void, typename, typename...>
+        struct is_invocable : std::false_type {};
+
+        template <typename F, typename... Args>
+        struct is_invocable<void_t<invoke_result_t<F, Args...>>, F, Args...>
+            : std::true_type {};
+
+        template <typename Void, typename, typename, typename...>
+        struct is_invocable_r : std::false_type {};
+
+        template <typename R, typename F, typename... Args>
+        struct is_invocable_r<void_t<invoke_result_t<F, Args...>>,
+                              R,
+                              F,
+                              Args...>
+            : std::is_convertible<invoke_result_t<F, Args...>, R> {};
+
+      }  // namespace detail
+
+      template <typename F, typename... Args>
+      using is_invocable = detail::is_invocable<void, F, Args...>;
+
+      template <typename R, typename F, typename... Args>
+      using is_invocable_r = detail::is_invocable_r<void, R, F, Args...>;
+
+      namespace detail {
+
+        template <bool Invocable, typename F, typename... Args>
+        struct is_nothrow_invocable {
+          static constexpr bool value =
+              noexcept(lib::invoke(std::declval<F>(), std::declval<Args>()...));
+        };
+
+        template <typename F, typename... Args>
+        struct is_nothrow_invocable<false, F, Args...> : std::false_type {};
+
+        template <bool Invocable, typename R, typename F, typename... Args>
+        struct is_nothrow_invocable_r {
+          private:
+          inline static R impl() {
+            return lib::invoke(std::declval<F>(), std::declval<Args>()...);
+          }
+
+          public:
+          static constexpr bool value = noexcept(impl());
+        };
+
+        template <typename R, typename F, typename... Args>
+        struct is_nothrow_invocable_r<false, R, F, Args...> : std::false_type {};
+
+      }  // namespace detail
+
+      template <typename F, typename... Args>
+      using is_nothrow_invocable = detail::
+          is_nothrow_invocable<is_invocable<F, Args...>::value, F, Args...>;
+
+      template <typename R, typename F, typename... Args>
+      using is_nothrow_invocable_r =
+          detail::is_nothrow_invocable_r<is_invocable_r<R, F, Args...>::value,
+                                         R,
+                                         F,
+                                         Args...>;
+
+      // <memory>
+#ifdef MPARK_BUILTIN_ADDRESSOF
+      template <typename T>
+      inline constexpr T *addressof(T &arg) noexcept {
+        return __builtin_addressof(arg);
+      }
+#else
+      namespace detail {
+
+        namespace has_addressof_impl {
+
+          struct fail;
+
+          template <typename T>
+          inline fail operator&(T &&);
+
+          template <typename T>
+          inline static constexpr bool impl() {
+            return (std::is_class<T>::value || std::is_union<T>::value) &&
+                   !std::is_same<decltype(&std::declval<T &>()), fail>::value;
+          }
+
+        }  // namespace has_addressof_impl
+
+        template <typename T>
+        using has_addressof = bool_constant<has_addressof_impl::impl<T>()>;
+
+        template <typename T>
+        inline constexpr T *addressof(T &arg, std::true_type) noexcept {
+          return std::addressof(arg);
+        }
+
+        template <typename T>
+        inline constexpr T *addressof(T &arg, std::false_type) noexcept {
+          return &arg;
+        }
+
+      }  // namespace detail
+
+      template <typename T>
+      inline constexpr T *addressof(T &arg) noexcept {
+        return detail::addressof(arg, detail::has_addressof<T>{});
+      }
+#endif
+
+      template <typename T>
+      inline constexpr T *addressof(const T &&) = delete;
+
+    }  // namespace cpp17
+
+    template <typename T>
+    struct remove_all_extents : identity<T> {};
+
+    template <typename T, std::size_t N>
+    struct remove_all_extents<array<T, N>> : remove_all_extents<T> {};
+
+    template <typename T>
+    using remove_all_extents_t = typename remove_all_extents<T>::type;
+
+    template <std::size_t N>
+    using size_constant = std::integral_constant<std::size_t, N>;
+
+    template <std::size_t I, typename T>
+    struct indexed_type : size_constant<I> { using type = T; };
+
+    template <bool... Bs>
+    using all = std::is_same<integer_sequence<bool, true, Bs...>,
+                             integer_sequence<bool, Bs..., true>>;
+
+#ifdef MPARK_TYPE_PACK_ELEMENT
+    template <std::size_t I, typename... Ts>
+    using type_pack_element_t = __type_pack_element<I, Ts...>;
+#else
+    template <std::size_t I, typename... Ts>
+    struct type_pack_element_impl {
+      private:
+      template <typename>
+      struct set;
+
+      template <std::size_t... Is>
+      struct set<index_sequence<Is...>> : indexed_type<Is, Ts>... {};
+
+      template <typename T>
+      inline static std::enable_if<true, T> impl(indexed_type<I, T>);
+
+      inline static std::enable_if<false> impl(...);
+
+      public:
+      using type = decltype(impl(set<index_sequence_for<Ts...>>{}));
+    };
+
+    template <std::size_t I, typename... Ts>
+    using type_pack_element = typename type_pack_element_impl<I, Ts...>::type;
+
+    template <std::size_t I, typename... Ts>
+    using type_pack_element_t = typename type_pack_element<I, Ts...>::type;
+#endif
+
+#ifdef MPARK_TRIVIALITY_TYPE_TRAITS
+    using std::is_trivially_copy_constructible;
+    using std::is_trivially_move_constructible;
+    using std::is_trivially_copy_assignable;
+    using std::is_trivially_move_assignable;
+#else
+    template <typename T>
+    struct is_trivially_copy_constructible
+        : bool_constant<
+              std::is_copy_constructible<T>::value && __has_trivial_copy(T)> {};
+
+    template <typename T>
+    struct is_trivially_move_constructible : bool_constant<__is_trivial(T)> {};
+
+    template <typename T>
+    struct is_trivially_copy_assignable
+        : bool_constant<
+              std::is_copy_assignable<T>::value && __has_trivial_assign(T)> {};
+
+    template <typename T>
+    struct is_trivially_move_assignable : bool_constant<__is_trivial(T)> {};
+#endif
+
+    template <typename T, bool>
+    struct dependent_type : T {};
+
+    template <typename Is, std::size_t J>
+    struct push_back;
+
+    template <typename Is, std::size_t J>
+    using push_back_t = typename push_back<Is, J>::type;
+
+    template <std::size_t... Is, std::size_t J>
+    struct push_back<index_sequence<Is...>, J> {
+      using type = index_sequence<Is..., J>;
+    };
+
+  }  // namespace lib
+}  // namespace mpark
+
+#undef MPARK_RETURN
+
+#endif  // MPARK_LIB_HPP
+
+
+namespace mpark {
+
+#ifdef MPARK_RETURN_TYPE_DEDUCTION
+
+#define AUTO auto
+#define AUTO_RETURN(...) { return __VA_ARGS__; }
+
+#define AUTO_REFREF auto &&
+#define AUTO_REFREF_RETURN(...) { return __VA_ARGS__; }
+
+#define DECLTYPE_AUTO decltype(auto)
+#define DECLTYPE_AUTO_RETURN(...) { return __VA_ARGS__; }
+
+#else
+
+#define AUTO auto
+#define AUTO_RETURN(...) \
+  -> lib::decay_t<decltype(__VA_ARGS__)> { return __VA_ARGS__; }
+
+#define AUTO_REFREF auto
+#define AUTO_REFREF_RETURN(...)                                           \
+  -> decltype((__VA_ARGS__)) {                                            \
+    static_assert(std::is_reference<decltype((__VA_ARGS__))>::value, ""); \
+    return __VA_ARGS__;                                                   \
+  }
+
+#define DECLTYPE_AUTO auto
+#define DECLTYPE_AUTO_RETURN(...) \
+  -> decltype(__VA_ARGS__) { return __VA_ARGS__; }
+
+#endif
+
+  class bad_variant_access : public std::exception {
+    public:
+    virtual const char *what() const noexcept override { return "bad_variant_access"; }
+  };
+
+  [[noreturn]] inline void throw_bad_variant_access() {
+#ifdef MPARK_EXCEPTIONS
+    throw bad_variant_access{};
+#else
+    std::terminate();
+    MPARK_BUILTIN_UNREACHABLE;
+#endif
+  }
+
+  template <typename... Ts>
+  class variant;
+
+  template <typename T>
+  struct variant_size;
+
+#ifdef MPARK_VARIABLE_TEMPLATES
+  template <typename T>
+  constexpr std::size_t variant_size_v = variant_size<T>::value;
+#endif
+
+  template <typename T>
+  struct variant_size<const T> : variant_size<T> {};
+
+  template <typename T>
+  struct variant_size<volatile T> : variant_size<T> {};
+
+  template <typename T>
+  struct variant_size<const volatile T> : variant_size<T> {};
+
+  template <typename... Ts>
+  struct variant_size<variant<Ts...>> : lib::size_constant<sizeof...(Ts)> {};
+
+  template <std::size_t I, typename T>
+  struct variant_alternative;
+
+  template <std::size_t I, typename T>
+  using variant_alternative_t = typename variant_alternative<I, T>::type;
+
+  template <std::size_t I, typename T>
+  struct variant_alternative<I, const T>
+      : std::add_const<variant_alternative_t<I, T>> {};
+
+  template <std::size_t I, typename T>
+  struct variant_alternative<I, volatile T>
+      : std::add_volatile<variant_alternative_t<I, T>> {};
+
+  template <std::size_t I, typename T>
+  struct variant_alternative<I, const volatile T>
+      : std::add_cv<variant_alternative_t<I, T>> {};
+
+  template <std::size_t I, typename... Ts>
+  struct variant_alternative<I, variant<Ts...>> {
+    static_assert(I < sizeof...(Ts),
+                  "index out of bounds in `std::variant_alternative<>`");
+    using type = lib::type_pack_element_t<I, Ts...>;
+  };
+
+  constexpr std::size_t variant_npos = static_cast<std::size_t>(-1);
+
+  namespace detail {
+
+    constexpr std::size_t not_found = static_cast<std::size_t>(-1);
+    constexpr std::size_t ambiguous = static_cast<std::size_t>(-2);
+
+#ifdef MPARK_CPP14_CONSTEXPR
+    template <typename T, typename... Ts>
+    inline constexpr std::size_t find_index() {
+      constexpr lib::array<bool, sizeof...(Ts)> matches = {
+          {std::is_same<T, Ts>::value...}
+      };
+      std::size_t result = not_found;
+      for (std::size_t i = 0; i < sizeof...(Ts); ++i) {
+        if (matches[i]) {
+          if (result != not_found) {
+            return ambiguous;
+          }
+          result = i;
+        }
+      }
+      return result;
+    }
+#else
+    inline constexpr std::size_t find_index_impl(std::size_t result,
+                                                 std::size_t) {
+      return result;
+    }
+
+    template <typename... Bs>
+    inline constexpr std::size_t find_index_impl(std::size_t result,
+                                                 std::size_t idx,
+                                                 bool b,
+                                                 Bs... bs) {
+      return b ? (result != not_found ? ambiguous
+                                      : find_index_impl(idx, idx + 1, bs...))
+               : find_index_impl(result, idx + 1, bs...);
+    }
+
+    template <typename T, typename... Ts>
+    inline constexpr std::size_t find_index() {
+      return find_index_impl(not_found, 0, std::is_same<T, Ts>::value...);
+    }
+#endif
+
+    template <std::size_t I>
+    using find_index_sfinae_impl =
+        lib::enable_if_t<I != not_found && I != ambiguous,
+                         lib::size_constant<I>>;
+
+    template <typename T, typename... Ts>
+    using find_index_sfinae = find_index_sfinae_impl<find_index<T, Ts...>()>;
+
+    template <std::size_t I>
+    struct find_index_checked_impl : lib::size_constant<I> {
+      static_assert(I != not_found, "the specified type is not found.");
+      static_assert(I != ambiguous, "the specified type is ambiguous.");
+    };
+
+    template <typename T, typename... Ts>
+    using find_index_checked = find_index_checked_impl<find_index<T, Ts...>()>;
+
+    struct valueless_t {};
+
+    enum class Trait { TriviallyAvailable, Available, Unavailable };
+
+    template <typename T,
+              template <typename> class IsTriviallyAvailable,
+              template <typename> class IsAvailable>
+    inline constexpr Trait trait() {
+      return IsTriviallyAvailable<T>::value
+                 ? Trait::TriviallyAvailable
+                 : IsAvailable<T>::value ? Trait::Available
+                                         : Trait::Unavailable;
+    }
+
+#ifdef MPARK_CPP14_CONSTEXPR
+    template <typename... Traits>
+    inline constexpr Trait common_trait(Traits... traits_) {
+      Trait result = Trait::TriviallyAvailable;
+      lib::array<Trait, sizeof...(Traits)> traits = {{traits_...}};
+      for (std::size_t i = 0; i < sizeof...(Traits); ++i) {
+        Trait t = traits[i];
+        if (static_cast<int>(t) > static_cast<int>(result)) {
+          result = t;
+        }
+      }
+      return result;
+    }
+#else
+    inline constexpr Trait common_trait_impl(Trait result) { return result; }
+
+    template <typename... Traits>
+    inline constexpr Trait common_trait_impl(Trait result,
+                                             Trait t,
+                                             Traits... ts) {
+      return static_cast<int>(t) > static_cast<int>(result)
+                 ? common_trait_impl(t, ts...)
+                 : common_trait_impl(result, ts...);
+    }
+
+    template <typename... Traits>
+    inline constexpr Trait common_trait(Traits... ts) {
+      return common_trait_impl(Trait::TriviallyAvailable, ts...);
+    }
+#endif
+
+    template <typename... Ts>
+    struct traits {
+      static constexpr Trait copy_constructible_trait =
+          common_trait(trait<Ts,
+                             lib::is_trivially_copy_constructible,
+                             std::is_copy_constructible>()...);
+
+      static constexpr Trait move_constructible_trait =
+          common_trait(trait<Ts,
+                             lib::is_trivially_move_constructible,
+                             std::is_move_constructible>()...);
+
+      static constexpr Trait copy_assignable_trait =
+          common_trait(copy_constructible_trait,
+                       trait<Ts,
+                             lib::is_trivially_copy_assignable,
+                             std::is_copy_assignable>()...);
+
+      static constexpr Trait move_assignable_trait =
+          common_trait(move_constructible_trait,
+                       trait<Ts,
+                             lib::is_trivially_move_assignable,
+                             std::is_move_assignable>()...);
+
+      static constexpr Trait destructible_trait =
+          common_trait(trait<Ts,
+                             std::is_trivially_destructible,
+                             std::is_destructible>()...);
+    };
+
+    namespace access {
+
+      struct recursive_union {
+#ifdef MPARK_RETURN_TYPE_DEDUCTION
+        template <typename V>
+        inline static constexpr auto &&get_alt(V &&v, in_place_index_t<0>) {
+          return lib::forward<V>(v).head_;
+        }
+
+        template <typename V, std::size_t I>
+        inline static constexpr auto &&get_alt(V &&v, in_place_index_t<I>) {
+          return get_alt(lib::forward<V>(v).tail_, in_place_index_t<I - 1>{});
+        }
+#else
+        template <std::size_t I, bool Dummy = true>
+        struct get_alt_impl {
+          template <typename V>
+          inline constexpr AUTO_REFREF operator()(V &&v) const
+            AUTO_REFREF_RETURN(get_alt_impl<I - 1>{}(lib::forward<V>(v).tail_))
+        };
+
+        template <bool Dummy>
+        struct get_alt_impl<0, Dummy> {
+          template <typename V>
+          inline constexpr AUTO_REFREF operator()(V &&v) const
+            AUTO_REFREF_RETURN(lib::forward<V>(v).head_)
+        };
+
+        template <typename V, std::size_t I>
+        inline static constexpr AUTO_REFREF get_alt(V &&v, in_place_index_t<I>)
+          AUTO_REFREF_RETURN(get_alt_impl<I>{}(lib::forward<V>(v)))
+#endif
+      };
+
+      struct base {
+        template <std::size_t I, typename V>
+        inline static constexpr AUTO_REFREF get_alt(V &&v)
+#ifdef _MSC_VER
+          AUTO_REFREF_RETURN(recursive_union::get_alt(
+              lib::forward<V>(v).data_, in_place_index_t<I>{}))
+#else
+          AUTO_REFREF_RETURN(recursive_union::get_alt(
+              data(lib::forward<V>(v)), in_place_index_t<I>{}))
+#endif
+      };
+
+      struct variant {
+        template <std::size_t I, typename V>
+        inline static constexpr AUTO_REFREF get_alt(V &&v)
+          AUTO_REFREF_RETURN(base::get_alt<I>(lib::forward<V>(v).impl_))
+      };
+
+    }  // namespace access
+
+    namespace visitation {
+
+#if defined(MPARK_CPP14_CONSTEXPR) && !defined(_MSC_VER)
+#define MPARK_VARIANT_SWITCH_VISIT
+#endif
+
+      struct base {
+        template <typename Visitor, typename... Vs>
+        using dispatch_result_t = decltype(
+            lib::invoke(std::declval<Visitor>(),
+                        access::base::get_alt<0>(std::declval<Vs>())...));
+
+        template <typename Expected>
+        struct expected {
+          template <typename Actual>
+          inline static constexpr bool but_got() {
+            return std::is_same<Expected, Actual>::value;
+          }
+        };
+
+        template <typename Expected, typename Actual>
+        struct visit_return_type_check {
+          static_assert(
+              expected<Expected>::template but_got<Actual>(),
+              "`visit` requires the visitor to have a single return type");
+
+          template <typename Visitor, typename... Alts>
+          inline static constexpr DECLTYPE_AUTO invoke(Visitor &&visitor,
+                                                       Alts &&... alts)
+            DECLTYPE_AUTO_RETURN(lib::invoke(lib::forward<Visitor>(visitor),
+                                             lib::forward<Alts>(alts)...))
+        };
+
+#ifdef MPARK_VARIANT_SWITCH_VISIT
+        template <bool B, typename R, typename... ITs>
+        struct dispatcher;
+
+        template <typename R, typename... ITs>
+        struct dispatcher<false, R, ITs...> {
+          template <std::size_t B, typename F, typename... Vs>
+          MPARK_ALWAYS_INLINE static constexpr R dispatch(
+              F &&, typename ITs::type &&..., Vs &&...) {
+            MPARK_BUILTIN_UNREACHABLE;
+          }
+
+          template <std::size_t I, typename F, typename... Vs>
+          MPARK_ALWAYS_INLINE static constexpr R dispatch_case(F &&, Vs &&...) {
+            MPARK_BUILTIN_UNREACHABLE;
+          }
+
+          template <std::size_t B, typename F, typename... Vs>
+          MPARK_ALWAYS_INLINE static constexpr R dispatch_at(std::size_t,
+                                                             F &&,
+                                                             Vs &&...) {
+            MPARK_BUILTIN_UNREACHABLE;
+          }
+        };
+
+        template <typename R, typename... ITs>
+        struct dispatcher<true, R, ITs...> {
+          template <std::size_t B, typename F>
+          MPARK_ALWAYS_INLINE static constexpr R dispatch(
+              F &&f, typename ITs::type &&... visited_vs) {
+            using Expected = R;
+            using Actual = decltype(lib::invoke(
+                lib::forward<F>(f),
+                access::base::get_alt<ITs::value>(
+                    lib::forward<typename ITs::type>(visited_vs))...));
+            return visit_return_type_check<Expected, Actual>::invoke(
+                lib::forward<F>(f),
+                access::base::get_alt<ITs::value>(
+                    lib::forward<typename ITs::type>(visited_vs))...);
+          }
+
+          template <std::size_t B, typename F, typename V, typename... Vs>
+          MPARK_ALWAYS_INLINE static constexpr R dispatch(
+              F &&f, typename ITs::type &&... visited_vs, V &&v, Vs &&... vs) {
+#define MPARK_DISPATCH(I)                                                   \
+  dispatcher<(I < lib::decay_t<V>::size()),                                 \
+             R,                                                             \
+             ITs...,                                                        \
+             lib::indexed_type<I, V>>::                                     \
+      template dispatch<0>(lib::forward<F>(f),                              \
+                           lib::forward<typename ITs::type>(visited_vs)..., \
+                           lib::forward<V>(v),                              \
+                           lib::forward<Vs>(vs)...)
+
+#define MPARK_DEFAULT(I)                                                      \
+  dispatcher<(I < lib::decay_t<V>::size()), R, ITs...>::template dispatch<I>( \
+      lib::forward<F>(f),                                                     \
+      lib::forward<typename ITs::type>(visited_vs)...,                        \
+      lib::forward<V>(v),                                                     \
+      lib::forward<Vs>(vs)...)
+
+            switch (v.index()) {
+              case B + 0: return MPARK_DISPATCH(B + 0);
+              case B + 1: return MPARK_DISPATCH(B + 1);
+              case B + 2: return MPARK_DISPATCH(B + 2);
+              case B + 3: return MPARK_DISPATCH(B + 3);
+              case B + 4: return MPARK_DISPATCH(B + 4);
+              case B + 5: return MPARK_DISPATCH(B + 5);
+              case B + 6: return MPARK_DISPATCH(B + 6);
+              case B + 7: return MPARK_DISPATCH(B + 7);
+              case B + 8: return MPARK_DISPATCH(B + 8);
+              case B + 9: return MPARK_DISPATCH(B + 9);
+              case B + 10: return MPARK_DISPATCH(B + 10);
+              case B + 11: return MPARK_DISPATCH(B + 11);
+              case B + 12: return MPARK_DISPATCH(B + 12);
+              case B + 13: return MPARK_DISPATCH(B + 13);
+              case B + 14: return MPARK_DISPATCH(B + 14);
+              case B + 15: return MPARK_DISPATCH(B + 15);
+              case B + 16: return MPARK_DISPATCH(B + 16);
+              case B + 17: return MPARK_DISPATCH(B + 17);
+              case B + 18: return MPARK_DISPATCH(B + 18);
+              case B + 19: return MPARK_DISPATCH(B + 19);
+              case B + 20: return MPARK_DISPATCH(B + 20);
+              case B + 21: return MPARK_DISPATCH(B + 21);
+              case B + 22: return MPARK_DISPATCH(B + 22);
+              case B + 23: return MPARK_DISPATCH(B + 23);
+              case B + 24: return MPARK_DISPATCH(B + 24);
+              case B + 25: return MPARK_DISPATCH(B + 25);
+              case B + 26: return MPARK_DISPATCH(B + 26);
+              case B + 27: return MPARK_DISPATCH(B + 27);
+              case B + 28: return MPARK_DISPATCH(B + 28);
+              case B + 29: return MPARK_DISPATCH(B + 29);
+              case B + 30: return MPARK_DISPATCH(B + 30);
+              case B + 31: return MPARK_DISPATCH(B + 31);
+              default: return MPARK_DEFAULT(B + 32);
+            }
+
+#undef MPARK_DEFAULT
+#undef MPARK_DISPATCH
+          }
+
+          template <std::size_t I, typename F, typename... Vs>
+          MPARK_ALWAYS_INLINE static constexpr R dispatch_case(F &&f,
+                                                               Vs &&... vs) {
+            using Expected = R;
+            using Actual = decltype(
+                lib::invoke(lib::forward<F>(f),
+                            access::base::get_alt<I>(lib::forward<Vs>(vs))...));
+            return visit_return_type_check<Expected, Actual>::invoke(
+                lib::forward<F>(f),
+                access::base::get_alt<I>(lib::forward<Vs>(vs))...);
+          }
+
+          template <std::size_t B, typename F, typename V, typename... Vs>
+          MPARK_ALWAYS_INLINE static constexpr R dispatch_at(std::size_t index,
+                                                             F &&f,
+                                                             V &&v,
+                                                             Vs &&... vs) {
+            static_assert(lib::all<(lib::decay_t<V>::size() ==
+                                    lib::decay_t<Vs>::size())...>::value,
+                          "all of the variants must be the same size.");
+#define MPARK_DISPATCH_AT(I)                                               \
+  dispatcher<(I < lib::decay_t<V>::size()), R>::template dispatch_case<I>( \
+      lib::forward<F>(f), lib::forward<V>(v), lib::forward<Vs>(vs)...)
+
+#define MPARK_DEFAULT(I)                                                 \
+  dispatcher<(I < lib::decay_t<V>::size()), R>::template dispatch_at<I>( \
+      index, lib::forward<F>(f), lib::forward<V>(v), lib::forward<Vs>(vs)...)
+
+            switch (index) {
+              case B + 0: return MPARK_DISPATCH_AT(B + 0);
+              case B + 1: return MPARK_DISPATCH_AT(B + 1);
+              case B + 2: return MPARK_DISPATCH_AT(B + 2);
+              case B + 3: return MPARK_DISPATCH_AT(B + 3);
+              case B + 4: return MPARK_DISPATCH_AT(B + 4);
+              case B + 5: return MPARK_DISPATCH_AT(B + 5);
+              case B + 6: return MPARK_DISPATCH_AT(B + 6);
+              case B + 7: return MPARK_DISPATCH_AT(B + 7);
+              case B + 8: return MPARK_DISPATCH_AT(B + 8);
+              case B + 9: return MPARK_DISPATCH_AT(B + 9);
+              case B + 10: return MPARK_DISPATCH_AT(B + 10);
+              case B + 11: return MPARK_DISPATCH_AT(B + 11);
+              case B + 12: return MPARK_DISPATCH_AT(B + 12);
+              case B + 13: return MPARK_DISPATCH_AT(B + 13);
+              case B + 14: return MPARK_DISPATCH_AT(B + 14);
+              case B + 15: return MPARK_DISPATCH_AT(B + 15);
+              case B + 16: return MPARK_DISPATCH_AT(B + 16);
+              case B + 17: return MPARK_DISPATCH_AT(B + 17);
+              case B + 18: return MPARK_DISPATCH_AT(B + 18);
+              case B + 19: return MPARK_DISPATCH_AT(B + 19);
+              case B + 20: return MPARK_DISPATCH_AT(B + 20);
+              case B + 21: return MPARK_DISPATCH_AT(B + 21);
+              case B + 22: return MPARK_DISPATCH_AT(B + 22);
+              case B + 23: return MPARK_DISPATCH_AT(B + 23);
+              case B + 24: return MPARK_DISPATCH_AT(B + 24);
+              case B + 25: return MPARK_DISPATCH_AT(B + 25);
+              case B + 26: return MPARK_DISPATCH_AT(B + 26);
+              case B + 27: return MPARK_DISPATCH_AT(B + 27);
+              case B + 28: return MPARK_DISPATCH_AT(B + 28);
+              case B + 29: return MPARK_DISPATCH_AT(B + 29);
+              case B + 30: return MPARK_DISPATCH_AT(B + 30);
+              case B + 31: return MPARK_DISPATCH_AT(B + 31);
+              default: return MPARK_DEFAULT(B + 32);
+            }
+
+#undef MPARK_DEFAULT
+#undef MPARK_DISPATCH_AT
+          }
+        };
+#else
+        template <typename T>
+        inline static constexpr const T &at(const T &elem) noexcept {
+          return elem;
+        }
+
+        template <typename T, std::size_t N, typename... Is>
+        inline static constexpr const lib::remove_all_extents_t<T> &at(
+            const lib::array<T, N> &elems, std::size_t i, Is... is) noexcept {
+          return at(elems[i], is...);
+        }
+
+        template <typename F, typename... Fs>
+        inline static constexpr lib::array<lib::decay_t<F>, sizeof...(Fs) + 1>
+        make_farray(F &&f, Fs &&... fs) {
+          return {{lib::forward<F>(f), lib::forward<Fs>(fs)...}};
+        }
+
+        template <typename F, typename... Vs>
+        struct make_fmatrix_impl {
+
+          template <std::size_t... Is>
+          inline static constexpr dispatch_result_t<F, Vs...> dispatch(
+              F &&f, Vs &&... vs) {
+            using Expected = dispatch_result_t<F, Vs...>;
+            using Actual = decltype(lib::invoke(
+                lib::forward<F>(f),
+                access::base::get_alt<Is>(lib::forward<Vs>(vs))...));
+            return visit_return_type_check<Expected, Actual>::invoke(
+                lib::forward<F>(f),
+                access::base::get_alt<Is>(lib::forward<Vs>(vs))...);
+          }
+
+#ifdef MPARK_RETURN_TYPE_DEDUCTION
+          template <std::size_t... Is>
+          inline static constexpr auto impl(lib::index_sequence<Is...>) {
+            return &dispatch<Is...>;
+          }
+
+          template <typename Is, std::size_t... Js, typename... Ls>
+          inline static constexpr auto impl(Is,
+                                            lib::index_sequence<Js...>,
+                                            Ls... ls) {
+            return make_farray(impl(lib::push_back_t<Is, Js>{}, ls...)...);
+          }
+#else
+          template <typename...>
+          struct impl;
+
+          template <std::size_t... Is>
+          struct impl<lib::index_sequence<Is...>> {
+            inline constexpr AUTO operator()() const
+              AUTO_RETURN(&dispatch<Is...>)
+          };
+
+          template <typename Is, std::size_t... Js, typename... Ls>
+          struct impl<Is, lib::index_sequence<Js...>, Ls...> {
+            inline constexpr AUTO operator()() const
+              AUTO_RETURN(
+                  make_farray(impl<lib::push_back_t<Is, Js>, Ls...>{}()...))
+          };
+#endif
+        };
+
+#ifdef MPARK_RETURN_TYPE_DEDUCTION
+        template <typename F, typename... Vs>
+        inline static constexpr auto make_fmatrix() {
+          return make_fmatrix_impl<F, Vs...>::impl(
+              lib::index_sequence<>{},
+              lib::make_index_sequence<lib::decay_t<Vs>::size()>{}...);
+        }
+#else
+        template <typename F, typename... Vs>
+        inline static constexpr AUTO make_fmatrix()
+          AUTO_RETURN(
+              typename make_fmatrix_impl<F, Vs...>::template impl<
+                  lib::index_sequence<>,
+                  lib::make_index_sequence<lib::decay_t<Vs>::size()>...>{}())
+#endif
+
+        template <typename F, typename... Vs>
+        struct make_fdiagonal_impl {
+          template <std::size_t I>
+          inline static constexpr dispatch_result_t<F, Vs...> dispatch(
+              F &&f, Vs &&... vs) {
+            using Expected = dispatch_result_t<F, Vs...>;
+            using Actual = decltype(
+                lib::invoke(lib::forward<F>(f),
+                            access::base::get_alt<I>(lib::forward<Vs>(vs))...));
+            return visit_return_type_check<Expected, Actual>::invoke(
+                lib::forward<F>(f),
+                access::base::get_alt<I>(lib::forward<Vs>(vs))...);
+          }
+
+          template <std::size_t... Is>
+          inline static constexpr AUTO impl(lib::index_sequence<Is...>)
+            AUTO_RETURN(make_farray(&dispatch<Is>...))
+        };
+
+        template <typename F, typename V, typename... Vs>
+        inline static constexpr auto make_fdiagonal()
+            -> decltype(make_fdiagonal_impl<F, V, Vs...>::impl(
+                lib::make_index_sequence<lib::decay_t<V>::size()>{})) {
+          static_assert(lib::all<(lib::decay_t<V>::size() ==
+                                  lib::decay_t<Vs>::size())...>::value,
+                        "all of the variants must be the same size.");
+          return make_fdiagonal_impl<F, V, Vs...>::impl(
+              lib::make_index_sequence<lib::decay_t<V>::size()>{});
+        }
+#endif
+      };
+
+#if !defined(MPARK_VARIANT_SWITCH_VISIT) && \
+    (!defined(_MSC_VER) || _MSC_VER >= 1910)
+      template <typename F, typename... Vs>
+      using fmatrix_t = decltype(base::make_fmatrix<F, Vs...>());
+
+      template <typename F, typename... Vs>
+      struct fmatrix {
+        static constexpr fmatrix_t<F, Vs...> value =
+            base::make_fmatrix<F, Vs...>();
+      };
+
+      template <typename F, typename... Vs>
+      constexpr fmatrix_t<F, Vs...> fmatrix<F, Vs...>::value;
+
+      template <typename F, typename... Vs>
+      using fdiagonal_t = decltype(base::make_fdiagonal<F, Vs...>());
+
+      template <typename F, typename... Vs>
+      struct fdiagonal {
+        static constexpr fdiagonal_t<F, Vs...> value =
+            base::make_fdiagonal<F, Vs...>();
+      };
+
+      template <typename F, typename... Vs>
+      constexpr fdiagonal_t<F, Vs...> fdiagonal<F, Vs...>::value;
+#endif
+
+      struct alt {
+        template <typename Visitor, typename... Vs>
+        inline static constexpr DECLTYPE_AUTO visit_alt(Visitor &&visitor,
+                                                        Vs &&... vs)
+#ifdef MPARK_VARIANT_SWITCH_VISIT
+          DECLTYPE_AUTO_RETURN(
+              base::dispatcher<
+                  true,
+                  base::dispatch_result_t<Visitor,
+                                          decltype(as_base(
+                                              lib::forward<Vs>(vs)))...>>::
+                  template dispatch<0>(lib::forward<Visitor>(visitor),
+                                       as_base(lib::forward<Vs>(vs))...))
+#elif !defined(_MSC_VER) || _MSC_VER >= 1910
+          DECLTYPE_AUTO_RETURN(base::at(
+              fmatrix<Visitor &&,
+                      decltype(as_base(lib::forward<Vs>(vs)))...>::value,
+              vs.index()...)(lib::forward<Visitor>(visitor),
+                             as_base(lib::forward<Vs>(vs))...))
+#else
+          DECLTYPE_AUTO_RETURN(base::at(
+              base::make_fmatrix<Visitor &&,
+                      decltype(as_base(lib::forward<Vs>(vs)))...>(),
+              vs.index()...)(lib::forward<Visitor>(visitor),
+                             as_base(lib::forward<Vs>(vs))...))
+#endif
+
+        template <typename Visitor, typename... Vs>
+        inline static constexpr DECLTYPE_AUTO visit_alt_at(std::size_t index,
+                                                           Visitor &&visitor,
+                                                           Vs &&... vs)
+#ifdef MPARK_VARIANT_SWITCH_VISIT
+          DECLTYPE_AUTO_RETURN(
+              base::dispatcher<
+                  true,
+                  base::dispatch_result_t<Visitor,
+                                          decltype(as_base(
+                                              lib::forward<Vs>(vs)))...>>::
+                  template dispatch_at<0>(index,
+                                          lib::forward<Visitor>(visitor),
+                                          as_base(lib::forward<Vs>(vs))...))
+#elif !defined(_MSC_VER) || _MSC_VER >= 1910
+          DECLTYPE_AUTO_RETURN(base::at(
+              fdiagonal<Visitor &&,
+                        decltype(as_base(lib::forward<Vs>(vs)))...>::value,
+              index)(lib::forward<Visitor>(visitor),
+                     as_base(lib::forward<Vs>(vs))...))
+#else
+          DECLTYPE_AUTO_RETURN(base::at(
+              base::make_fdiagonal<Visitor &&,
+                        decltype(as_base(lib::forward<Vs>(vs)))...>(),
+              index)(lib::forward<Visitor>(visitor),
+                     as_base(lib::forward<Vs>(vs))...))
+#endif
+      };
+
+      struct variant {
+        private:
+        template <typename Visitor>
+        struct visitor {
+          template <typename... Values>
+          inline static constexpr bool does_not_handle() {
+            return lib::is_invocable<Visitor, Values...>::value;
+          }
+        };
+
+        template <typename Visitor, typename... Values>
+        struct visit_exhaustiveness_check {
+          static_assert(visitor<Visitor>::template does_not_handle<Values...>(),
+                        "`visit` requires the visitor to be exhaustive.");
+
+          inline static constexpr DECLTYPE_AUTO invoke(Visitor &&visitor,
+                                                       Values &&... values)
+            DECLTYPE_AUTO_RETURN(lib::invoke(lib::forward<Visitor>(visitor),
+                                             lib::forward<Values>(values)...))
+        };
+
+        template <typename Visitor>
+        struct value_visitor {
+          Visitor &&visitor_;
+
+          template <typename... Alts>
+          inline constexpr DECLTYPE_AUTO operator()(Alts &&... alts) const
+            DECLTYPE_AUTO_RETURN(
+                visit_exhaustiveness_check<
+                    Visitor,
+                    decltype((lib::forward<Alts>(alts).value))...>::
+                    invoke(lib::forward<Visitor>(visitor_),
+                           lib::forward<Alts>(alts).value...))
+        };
+
+        template <typename Visitor>
+        inline static constexpr AUTO make_value_visitor(Visitor &&visitor)
+          AUTO_RETURN(value_visitor<Visitor>{lib::forward<Visitor>(visitor)})
+
+        public:
+        template <typename Visitor, typename... Vs>
+        inline static constexpr DECLTYPE_AUTO visit_alt(Visitor &&visitor,
+                                                        Vs &&... vs)
+          DECLTYPE_AUTO_RETURN(alt::visit_alt(lib::forward<Visitor>(visitor),
+                                              lib::forward<Vs>(vs).impl_...))
+
+        template <typename Visitor, typename... Vs>
+        inline static constexpr DECLTYPE_AUTO visit_alt_at(std::size_t index,
+                                                           Visitor &&visitor,
+                                                           Vs &&... vs)
+          DECLTYPE_AUTO_RETURN(
+              alt::visit_alt_at(index,
+                                lib::forward<Visitor>(visitor),
+                                lib::forward<Vs>(vs).impl_...))
+
+        template <typename Visitor, typename... Vs>
+        inline static constexpr DECLTYPE_AUTO visit_value(Visitor &&visitor,
+                                                          Vs &&... vs)
+          DECLTYPE_AUTO_RETURN(
+              visit_alt(make_value_visitor(lib::forward<Visitor>(visitor)),
+                        lib::forward<Vs>(vs)...))
+
+        template <typename Visitor, typename... Vs>
+        inline static constexpr DECLTYPE_AUTO visit_value_at(std::size_t index,
+                                                             Visitor &&visitor,
+                                                             Vs &&... vs)
+          DECLTYPE_AUTO_RETURN(
+              visit_alt_at(index,
+                           make_value_visitor(lib::forward<Visitor>(visitor)),
+                           lib::forward<Vs>(vs)...))
+      };
+
+    }  // namespace visitation
+
+    template <std::size_t Index, typename T>
+    struct alt {
+      using value_type = T;
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4244)
+#endif
+      template <typename... Args>
+      inline explicit constexpr alt(in_place_t, Args &&... args)
+          : value(lib::forward<Args>(args)...) {}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+      T value;
+    };
+
+    template <Trait DestructibleTrait, std::size_t Index, typename... Ts>
+    union recursive_union;
+
+    template <Trait DestructibleTrait, std::size_t Index>
+    union recursive_union<DestructibleTrait, Index> {};
+
+#define MPARK_VARIANT_RECURSIVE_UNION(destructible_trait, destructor)      \
+  template <std::size_t Index, typename T, typename... Ts>                 \
+  union recursive_union<destructible_trait, Index, T, Ts...> {             \
+    public:                                                                \
+    inline explicit constexpr recursive_union(valueless_t) noexcept        \
+        : dummy_{} {}                                                      \
+                                                                           \
+    template <typename... Args>                                            \
+    inline explicit constexpr recursive_union(in_place_index_t<0>,         \
+                                              Args &&... args)             \
+        : head_(in_place_t{}, lib::forward<Args>(args)...) {}              \
+                                                                           \
+    template <std::size_t I, typename... Args>                             \
+    inline explicit constexpr recursive_union(in_place_index_t<I>,         \
+                                              Args &&... args)             \
+        : tail_(in_place_index_t<I - 1>{}, lib::forward<Args>(args)...) {} \
+                                                                           \
+    recursive_union(const recursive_union &) = default;                    \
+    recursive_union(recursive_union &&) = default;                         \
+                                                                           \
+    destructor                                                             \
+                                                                           \
+    recursive_union &operator=(const recursive_union &) = default;         \
+    recursive_union &operator=(recursive_union &&) = default;              \
+                                                                           \
+    private:                                                               \
+    char dummy_;                                                           \
+    alt<Index, T> head_;                                                   \
+    recursive_union<destructible_trait, Index + 1, Ts...> tail_;           \
+                                                                           \
+    friend struct access::recursive_union;                                 \
+  }
+
+    MPARK_VARIANT_RECURSIVE_UNION(Trait::TriviallyAvailable,
+                                  ~recursive_union() = default;);
+    MPARK_VARIANT_RECURSIVE_UNION(Trait::Available,
+                                  ~recursive_union() {});
+    MPARK_VARIANT_RECURSIVE_UNION(Trait::Unavailable,
+                                  ~recursive_union() = delete;);
+
+#undef MPARK_VARIANT_RECURSIVE_UNION
+
+    using index_t = unsigned int;
+
+    template <Trait DestructibleTrait, typename... Ts>
+    class base {
+      public:
+      inline explicit constexpr base(valueless_t tag) noexcept
+          : data_(tag), index_(static_cast<index_t>(-1)) {}
+
+      template <std::size_t I, typename... Args>
+      inline explicit constexpr base(in_place_index_t<I>, Args &&... args)
+          : data_(in_place_index_t<I>{}, lib::forward<Args>(args)...),
+            index_(I) {}
+
+      inline constexpr bool valueless_by_exception() const noexcept {
+        return index_ == static_cast<index_t>(-1);
+      }
+
+      inline constexpr std::size_t index() const noexcept {
+        return valueless_by_exception() ? variant_npos : index_;
+      }
+
+      protected:
+      using data_t = recursive_union<DestructibleTrait, 0, Ts...>;
+
+      friend inline constexpr base &as_base(base &b) { return b; }
+      friend inline constexpr const base &as_base(const base &b) { return b; }
+      friend inline constexpr base &&as_base(base &&b) { return lib::move(b); }
+      friend inline constexpr const base &&as_base(const base &&b) { return lib::move(b); }
+
+      friend inline constexpr data_t &data(base &b) { return b.data_; }
+      friend inline constexpr const data_t &data(const base &b) { return b.data_; }
+      friend inline constexpr data_t &&data(base &&b) { return lib::move(b).data_; }
+      friend inline constexpr const data_t &&data(const base &&b) { return lib::move(b).data_; }
+
+      inline static constexpr std::size_t size() { return sizeof...(Ts); }
+
+      data_t data_;
+      index_t index_;
+
+      friend struct access::base;
+      friend struct visitation::base;
+    };
+
+    struct dtor {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#endif
+      template <typename Alt>
+      inline void operator()(Alt &alt) const noexcept { alt.~Alt(); }
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+    };
+
+#if !defined(_MSC_VER) || _MSC_VER >= 1910
+#define MPARK_INHERITING_CTOR(type, base) using base::base;
+#else
+#define MPARK_INHERITING_CTOR(type, base)         \
+  template <typename... Args>                     \
+  inline explicit constexpr type(Args &&... args) \
+      : base(lib::forward<Args>(args)...) {}
+#endif
+
+    template <typename Traits, Trait = Traits::destructible_trait>
+    class destructor;
+
+#define MPARK_VARIANT_DESTRUCTOR(destructible_trait, definition, destroy) \
+  template <typename... Ts>                                               \
+  class destructor<traits<Ts...>, destructible_trait>                     \
+      : public base<destructible_trait, Ts...> {                          \
+    using super = base<destructible_trait, Ts...>;                        \
+                                                                          \
+    public:                                                               \
+    MPARK_INHERITING_CTOR(destructor, super)                              \
+    using super::operator=;                                               \
+                                                                          \
+    destructor(const destructor &) = default;                             \
+    destructor(destructor &&) = default;                                  \
+    definition                                                            \
+    destructor &operator=(const destructor &) = default;                  \
+    destructor &operator=(destructor &&) = default;                       \
+                                                                          \
+    protected:                                                            \
+    destroy                                                               \
+  }
+
+    MPARK_VARIANT_DESTRUCTOR(
+        Trait::TriviallyAvailable,
+        ~destructor() = default;,
+        inline void destroy() noexcept {
+          this->index_ = static_cast<index_t>(-1);
+        });
+
+    MPARK_VARIANT_DESTRUCTOR(
+        Trait::Available,
+        ~destructor() { destroy(); },
+        inline void destroy() noexcept {
+          if (!this->valueless_by_exception()) {
+            visitation::alt::visit_alt(dtor{}, *this);
+          }
+          this->index_ = static_cast<index_t>(-1);
+        });
+
+    MPARK_VARIANT_DESTRUCTOR(
+        Trait::Unavailable,
+        ~destructor() = delete;,
+        inline void destroy() noexcept = delete;);
+
+#undef MPARK_VARIANT_DESTRUCTOR
+
+    template <typename Traits>
+    class constructor : public destructor<Traits> {
+      using super = destructor<Traits>;
+
+      public:
+      MPARK_INHERITING_CTOR(constructor, super)
+      using super::operator=;
+
+      protected:
+#ifndef MPARK_GENERIC_LAMBDAS
+      struct ctor {
+        template <typename LhsAlt, typename RhsAlt>
+        inline void operator()(LhsAlt &lhs_alt, RhsAlt &&rhs_alt) const {
+          constructor::construct_alt(lhs_alt,
+                                     lib::forward<RhsAlt>(rhs_alt).value);
+        }
+      };
+#endif
+
+      template <std::size_t I, typename T, typename... Args>
+      inline static T &construct_alt(alt<I, T> &a, Args &&... args) {
+        auto *result = ::new (static_cast<void *>(lib::addressof(a)))
+            alt<I, T>(in_place_t{}, lib::forward<Args>(args)...);
+        return result->value;
+      }
+
+      template <typename Rhs>
+      inline static void generic_construct(constructor &lhs, Rhs &&rhs) {
+        lhs.destroy();
+        if (!rhs.valueless_by_exception()) {
+          visitation::alt::visit_alt_at(
+              rhs.index(),
+#ifdef MPARK_GENERIC_LAMBDAS
+              [](auto &lhs_alt, auto &&rhs_alt) {
+                constructor::construct_alt(
+                    lhs_alt, lib::forward<decltype(rhs_alt)>(rhs_alt).value);
+              }
+#else
+              ctor{}
+#endif
+              ,
+              lhs,
+              lib::forward<Rhs>(rhs));
+          lhs.index_ = rhs.index_;
+        }
+      }
+    };
+
+    template <typename Traits, Trait = Traits::move_constructible_trait>
+    class move_constructor;
+
+#define MPARK_VARIANT_MOVE_CONSTRUCTOR(move_constructible_trait, definition) \
+  template <typename... Ts>                                                  \
+  class move_constructor<traits<Ts...>, move_constructible_trait>            \
+      : public constructor<traits<Ts...>> {                                  \
+    using super = constructor<traits<Ts...>>;                                \
+                                                                             \
+    public:                                                                  \
+    MPARK_INHERITING_CTOR(move_constructor, super)                           \
+    using super::operator=;                                                  \
+                                                                             \
+    move_constructor(const move_constructor &) = default;                    \
+    definition                                                               \
+    ~move_constructor() = default;                                           \
+    move_constructor &operator=(const move_constructor &) = default;         \
+    move_constructor &operator=(move_constructor &&) = default;              \
+  }
+
+    MPARK_VARIANT_MOVE_CONSTRUCTOR(
+        Trait::TriviallyAvailable,
+        move_constructor(move_constructor &&that) = default;);
+
+    MPARK_VARIANT_MOVE_CONSTRUCTOR(
+        Trait::Available,
+        move_constructor(move_constructor &&that) noexcept(
+            lib::all<std::is_nothrow_move_constructible<Ts>::value...>::value)
+            : move_constructor(valueless_t{}) {
+          this->generic_construct(*this, lib::move(that));
+        });
+
+    MPARK_VARIANT_MOVE_CONSTRUCTOR(
+        Trait::Unavailable,
+        move_constructor(move_constructor &&) = delete;);
+
+#undef MPARK_VARIANT_MOVE_CONSTRUCTOR
+
+    template <typename Traits, Trait = Traits::copy_constructible_trait>
+    class copy_constructor;
+
+#define MPARK_VARIANT_COPY_CONSTRUCTOR(copy_constructible_trait, definition) \
+  template <typename... Ts>                                                  \
+  class copy_constructor<traits<Ts...>, copy_constructible_trait>            \
+      : public move_constructor<traits<Ts...>> {                             \
+    using super = move_constructor<traits<Ts...>>;                           \
+                                                                             \
+    public:                                                                  \
+    MPARK_INHERITING_CTOR(copy_constructor, super)                           \
+    using super::operator=;                                                  \
+                                                                             \
+    definition                                                               \
+    copy_constructor(copy_constructor &&) = default;                         \
+    ~copy_constructor() = default;                                           \
+    copy_constructor &operator=(const copy_constructor &) = default;         \
+    copy_constructor &operator=(copy_constructor &&) = default;              \
+  }
+
+    MPARK_VARIANT_COPY_CONSTRUCTOR(
+        Trait::TriviallyAvailable,
+        copy_constructor(const copy_constructor &that) = default;);
+
+    MPARK_VARIANT_COPY_CONSTRUCTOR(
+        Trait::Available,
+        copy_constructor(const copy_constructor &that)
+            : copy_constructor(valueless_t{}) {
+          this->generic_construct(*this, that);
+        });
+
+    MPARK_VARIANT_COPY_CONSTRUCTOR(
+        Trait::Unavailable,
+        copy_constructor(const copy_constructor &) = delete;);
+
+#undef MPARK_VARIANT_COPY_CONSTRUCTOR
+
+    template <typename Traits>
+    class assignment : public copy_constructor<Traits> {
+      using super = copy_constructor<Traits>;
+
+      public:
+      MPARK_INHERITING_CTOR(assignment, super)
+      using super::operator=;
+
+      template <std::size_t I, typename... Args>
+      inline /* auto & */ auto emplace(Args &&... args)
+          -> decltype(this->construct_alt(access::base::get_alt<I>(*this),
+                                          lib::forward<Args>(args)...)) {
+        this->destroy();
+        auto &result = this->construct_alt(access::base::get_alt<I>(*this),
+                                           lib::forward<Args>(args)...);
+        this->index_ = I;
+        return result;
+      }
+
+      protected:
+#ifndef MPARK_GENERIC_LAMBDAS
+      template <typename That>
+      struct assigner {
+        template <typename ThisAlt, typename ThatAlt>
+        inline void operator()(ThisAlt &this_alt, ThatAlt &&that_alt) const {
+          self->assign_alt(this_alt, lib::forward<ThatAlt>(that_alt).value);
+        }
+        assignment *self;
+      };
+#endif
+
+      template <std::size_t I, typename T, typename Arg>
+      inline void assign_alt(alt<I, T> &a, Arg &&arg) {
+        if (this->index() == I) {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4244)
+#endif
+          a.value = lib::forward<Arg>(arg);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+        } else {
+          struct {
+            void operator()(std::true_type) const {
+              this_->emplace<I>(lib::forward<Arg>(arg_));
+            }
+            void operator()(std::false_type) const {
+              this_->emplace<I>(T(lib::forward<Arg>(arg_)));
+            }
+            assignment *this_;
+            Arg &&arg_;
+          } impl{this, lib::forward<Arg>(arg)};
+          impl(lib::bool_constant<
+                   std::is_nothrow_constructible<T, Arg>::value ||
+                   !std::is_nothrow_move_constructible<T>::value>{});
+        }
+      }
+
+      template <typename That>
+      inline void generic_assign(That &&that) {
+        if (this->valueless_by_exception() && that.valueless_by_exception()) {
+          // do nothing.
+        } else if (that.valueless_by_exception()) {
+          this->destroy();
+        } else {
+          visitation::alt::visit_alt_at(
+              that.index(),
+#ifdef MPARK_GENERIC_LAMBDAS
+              [this](auto &this_alt, auto &&that_alt) {
+                this->assign_alt(
+                    this_alt, lib::forward<decltype(that_alt)>(that_alt).value);
+              }
+#else
+              assigner<That>{this}
+#endif
+              ,
+              *this,
+              lib::forward<That>(that));
+        }
+      }
+    };
+
+    template <typename Traits, Trait = Traits::move_assignable_trait>
+    class move_assignment;
+
+#define MPARK_VARIANT_MOVE_ASSIGNMENT(move_assignable_trait, definition) \
+  template <typename... Ts>                                              \
+  class move_assignment<traits<Ts...>, move_assignable_trait>            \
+      : public assignment<traits<Ts...>> {                               \
+    using super = assignment<traits<Ts...>>;                             \
+                                                                         \
+    public:                                                              \
+    MPARK_INHERITING_CTOR(move_assignment, super)                        \
+    using super::operator=;                                              \
+                                                                         \
+    move_assignment(const move_assignment &) = default;                  \
+    move_assignment(move_assignment &&) = default;                       \
+    ~move_assignment() = default;                                        \
+    move_assignment &operator=(const move_assignment &) = default;       \
+    definition                                                           \
+  }
+
+    MPARK_VARIANT_MOVE_ASSIGNMENT(
+        Trait::TriviallyAvailable,
+        move_assignment &operator=(move_assignment &&that) = default;);
+
+    MPARK_VARIANT_MOVE_ASSIGNMENT(
+        Trait::Available,
+        move_assignment &
+        operator=(move_assignment &&that) noexcept(
+            lib::all<(std::is_nothrow_move_constructible<Ts>::value &&
+                      std::is_nothrow_move_assignable<Ts>::value)...>::value) {
+          this->generic_assign(lib::move(that));
+          return *this;
+        });
+
+    MPARK_VARIANT_MOVE_ASSIGNMENT(
+        Trait::Unavailable,
+        move_assignment &operator=(move_assignment &&) = delete;);
+
+#undef MPARK_VARIANT_MOVE_ASSIGNMENT
+
+    template <typename Traits, Trait = Traits::copy_assignable_trait>
+    class copy_assignment;
+
+#define MPARK_VARIANT_COPY_ASSIGNMENT(copy_assignable_trait, definition) \
+  template <typename... Ts>                                              \
+  class copy_assignment<traits<Ts...>, copy_assignable_trait>            \
+      : public move_assignment<traits<Ts...>> {                          \
+    using super = move_assignment<traits<Ts...>>;                        \
+                                                                         \
+    public:                                                              \
+    MPARK_INHERITING_CTOR(copy_assignment, super)                        \
+    using super::operator=;                                              \
+                                                                         \
+    copy_assignment(const copy_assignment &) = default;                  \
+    copy_assignment(copy_assignment &&) = default;                       \
+    ~copy_assignment() = default;                                        \
+    definition                                                           \
+    copy_assignment &operator=(copy_assignment &&) = default;            \
+  }
+
+    MPARK_VARIANT_COPY_ASSIGNMENT(
+        Trait::TriviallyAvailable,
+        copy_assignment &operator=(const copy_assignment &that) = default;);
+
+    MPARK_VARIANT_COPY_ASSIGNMENT(
+        Trait::Available,
+        copy_assignment &operator=(const copy_assignment &that) {
+          this->generic_assign(that);
+          return *this;
+        });
+
+    MPARK_VARIANT_COPY_ASSIGNMENT(
+        Trait::Unavailable,
+        copy_assignment &operator=(const copy_assignment &) = delete;);
+
+#undef MPARK_VARIANT_COPY_ASSIGNMENT
+
+    template <typename... Ts>
+    class impl : public copy_assignment<traits<Ts...>> {
+      using super = copy_assignment<traits<Ts...>>;
+
+      public:
+      MPARK_INHERITING_CTOR(impl, super)
+      using super::operator=;
+
+      template <std::size_t I, typename Arg>
+      inline void assign(Arg &&arg) {
+        this->assign_alt(access::base::get_alt<I>(*this),
+                         lib::forward<Arg>(arg));
+      }
+
+      inline void swap(impl &that) {
+        if (this->valueless_by_exception() && that.valueless_by_exception()) {
+          // do nothing.
+        } else if (this->index() == that.index()) {
+          visitation::alt::visit_alt_at(this->index(),
+#ifdef MPARK_GENERIC_LAMBDAS
+                                        [](auto &this_alt, auto &that_alt) {
+                                          using std::swap;
+                                          swap(this_alt.value,
+                                               that_alt.value);
+                                        }
+#else
+                                        swapper{}
+#endif
+                                        ,
+                                        *this,
+                                        that);
+        } else {
+          impl *lhs = this;
+          impl *rhs = lib::addressof(that);
+          if (lhs->move_nothrow() && !rhs->move_nothrow()) {
+            std::swap(lhs, rhs);
+          }
+          impl tmp(lib::move(*rhs));
+#ifdef MPARK_EXCEPTIONS
+          // EXTENSION: When the move construction of `lhs` into `rhs` throws
+          // and `tmp` is nothrow move constructible then we move `tmp` back
+          // into `rhs` and provide the strong exception safety guarantee.
+          try {
+            this->generic_construct(*rhs, lib::move(*lhs));
+          } catch (...) {
+            if (tmp.move_nothrow()) {
+              this->generic_construct(*rhs, lib::move(tmp));
+            }
+            throw;
+          }
+#else
+          this->generic_construct(*rhs, lib::move(*lhs));
+#endif
+          this->generic_construct(*lhs, lib::move(tmp));
+        }
+      }
+
+      private:
+#ifndef MPARK_GENERIC_LAMBDAS
+      struct swapper {
+        template <typename ThisAlt, typename ThatAlt>
+        inline void operator()(ThisAlt &this_alt, ThatAlt &that_alt) const {
+          using std::swap;
+          swap(this_alt.value, that_alt.value);
+        }
+      };
+#endif
+
+      inline constexpr bool move_nothrow() const {
+        return this->valueless_by_exception() ||
+               lib::array<bool, sizeof...(Ts)>{
+                   {std::is_nothrow_move_constructible<Ts>::value...}
+               }[this->index()];
+      }
+    };
+
+#undef MPARK_INHERITING_CTOR
+
+    template <std::size_t I, typename T>
+    struct overload_leaf {
+      using F = lib::size_constant<I> (*)(T);
+      operator F() const { return nullptr; }
+    };
+
+    template <typename... Ts>
+    struct overload_impl {
+      private:
+      template <typename>
+      struct impl;
+
+      template <std::size_t... Is>
+      struct impl<lib::index_sequence<Is...>> : overload_leaf<Is, Ts>... {};
+
+      public:
+      using type = impl<lib::index_sequence_for<Ts...>>;
+    };
+
+    template <typename... Ts>
+    using overload = typename overload_impl<Ts...>::type;
+
+    template <typename T, typename... Ts>
+    using best_match = lib::invoke_result_t<overload<Ts...>, T &&>;
+
+    template <typename T>
+    struct is_in_place_index : std::false_type {};
+
+    template <std::size_t I>
+    struct is_in_place_index<in_place_index_t<I>> : std::true_type {};
+
+    template <typename T>
+    struct is_in_place_type : std::false_type {};
+
+    template <typename T>
+    struct is_in_place_type<in_place_type_t<T>> : std::true_type {};
+
+  }  // detail
+
+  template <typename... Ts>
+  class variant {
+    static_assert(0 < sizeof...(Ts),
+                  "variant must consist of at least one alternative.");
+
+    static_assert(lib::all<!std::is_array<Ts>::value...>::value,
+                  "variant can not have an array type as an alternative.");
+
+    static_assert(lib::all<!std::is_reference<Ts>::value...>::value,
+                  "variant can not have a reference type as an alternative.");
+
+    static_assert(lib::all<!std::is_void<Ts>::value...>::value,
+                  "variant can not have a void type as an alternative.");
+
+    public:
+    template <
+        typename Front = lib::type_pack_element_t<0, Ts...>,
+        lib::enable_if_t<std::is_default_constructible<Front>::value, int> = 0>
+    inline constexpr variant() noexcept(
+        std::is_nothrow_default_constructible<Front>::value)
+        : impl_(in_place_index_t<0>{}) {}
+
+    variant(const variant &) = default;
+    variant(variant &&) = default;
+
+    template <
+        typename Arg,
+        typename Decayed = lib::decay_t<Arg>,
+        lib::enable_if_t<!std::is_same<Decayed, variant>::value, int> = 0,
+        lib::enable_if_t<!detail::is_in_place_index<Decayed>::value, int> = 0,
+        lib::enable_if_t<!detail::is_in_place_type<Decayed>::value, int> = 0,
+        std::size_t I = detail::best_match<Arg, Ts...>::value,
+        typename T = lib::type_pack_element_t<I, Ts...>,
+        lib::enable_if_t<std::is_constructible<T, Arg>::value, int> = 0>
+    inline constexpr variant(Arg &&arg) noexcept(
+        std::is_nothrow_constructible<T, Arg>::value)
+        : impl_(in_place_index_t<I>{}, lib::forward<Arg>(arg)) {}
+
+    template <
+        std::size_t I,
+        typename... Args,
+        typename T = lib::type_pack_element_t<I, Ts...>,
+        lib::enable_if_t<std::is_constructible<T, Args...>::value, int> = 0>
+    inline explicit constexpr variant(
+        in_place_index_t<I>,
+        Args &&... args) noexcept(std::is_nothrow_constructible<T,
+                                                                Args...>::value)
+        : impl_(in_place_index_t<I>{}, lib::forward<Args>(args)...) {}
+
+    template <
+        std::size_t I,
+        typename Up,
+        typename... Args,
+        typename T = lib::type_pack_element_t<I, Ts...>,
+        lib::enable_if_t<std::is_constructible<T,
+                                               std::initializer_list<Up> &,
+                                               Args...>::value,
+                         int> = 0>
+    inline explicit constexpr variant(
+        in_place_index_t<I>,
+        std::initializer_list<Up> il,
+        Args &&... args) noexcept(std::
+                                      is_nothrow_constructible<
+                                          T,
+                                          std::initializer_list<Up> &,
+                                          Args...>::value)
+        : impl_(in_place_index_t<I>{}, il, lib::forward<Args>(args)...) {}
+
+    template <
+        typename T,
+        typename... Args,
+        std::size_t I = detail::find_index_sfinae<T, Ts...>::value,
+        lib::enable_if_t<std::is_constructible<T, Args...>::value, int> = 0>
+    inline explicit constexpr variant(
+        in_place_type_t<T>,
+        Args &&... args) noexcept(std::is_nothrow_constructible<T,
+                                                                Args...>::value)
+        : impl_(in_place_index_t<I>{}, lib::forward<Args>(args)...) {}
+
+    template <
+        typename T,
+        typename Up,
+        typename... Args,
+        std::size_t I = detail::find_index_sfinae<T, Ts...>::value,
+        lib::enable_if_t<std::is_constructible<T,
+                                               std::initializer_list<Up> &,
+                                               Args...>::value,
+                         int> = 0>
+    inline explicit constexpr variant(
+        in_place_type_t<T>,
+        std::initializer_list<Up> il,
+        Args &&... args) noexcept(std::
+                                      is_nothrow_constructible<
+                                          T,
+                                          std::initializer_list<Up> &,
+                                          Args...>::value)
+        : impl_(in_place_index_t<I>{}, il, lib::forward<Args>(args)...) {}
+
+    ~variant() = default;
+
+    variant &operator=(const variant &) = default;
+    variant &operator=(variant &&) = default;
+
+    template <typename Arg,
+              lib::enable_if_t<!std::is_same<lib::decay_t<Arg>, variant>::value,
+                               int> = 0,
+              std::size_t I = detail::best_match<Arg, Ts...>::value,
+              typename T = lib::type_pack_element_t<I, Ts...>,
+              lib::enable_if_t<(std::is_assignable<T &, Arg>::value &&
+                                std::is_constructible<T, Arg>::value),
+                               int> = 0>
+    inline variant &operator=(Arg &&arg) noexcept(
+        (std::is_nothrow_assignable<T &, Arg>::value &&
+         std::is_nothrow_constructible<T, Arg>::value)) {
+      impl_.template assign<I>(lib::forward<Arg>(arg));
+      return *this;
+    }
+
+    template <
+        std::size_t I,
+        typename... Args,
+        typename T = lib::type_pack_element_t<I, Ts...>,
+        lib::enable_if_t<std::is_constructible<T, Args...>::value, int> = 0>
+    inline T &emplace(Args &&... args) {
+      return impl_.template emplace<I>(lib::forward<Args>(args)...);
+    }
+
+    template <
+        std::size_t I,
+        typename Up,
+        typename... Args,
+        typename T = lib::type_pack_element_t<I, Ts...>,
+        lib::enable_if_t<std::is_constructible<T,
+                                               std::initializer_list<Up> &,
+                                               Args...>::value,
+                         int> = 0>
+    inline T &emplace(std::initializer_list<Up> il, Args &&... args) {
+      return impl_.template emplace<I>(il, lib::forward<Args>(args)...);
+    }
+
+    template <
+        typename T,
+        typename... Args,
+        std::size_t I = detail::find_index_sfinae<T, Ts...>::value,
+        lib::enable_if_t<std::is_constructible<T, Args...>::value, int> = 0>
+    inline T &emplace(Args &&... args) {
+      return impl_.template emplace<I>(lib::forward<Args>(args)...);
+    }
+
+    template <
+        typename T,
+        typename Up,
+        typename... Args,
+        std::size_t I = detail::find_index_sfinae<T, Ts...>::value,
+        lib::enable_if_t<std::is_constructible<T,
+                                               std::initializer_list<Up> &,
+                                               Args...>::value,
+                         int> = 0>
+    inline T &emplace(std::initializer_list<Up> il, Args &&... args) {
+      return impl_.template emplace<I>(il, lib::forward<Args>(args)...);
+    }
+
+    inline constexpr bool valueless_by_exception() const noexcept {
+      return impl_.valueless_by_exception();
+    }
+
+    inline constexpr std::size_t index() const noexcept {
+      return impl_.index();
+    }
+
+    template <bool Dummy = true,
+              lib::enable_if_t<
+                  lib::all<Dummy,
+                           (lib::dependent_type<std::is_move_constructible<Ts>,
+                                                Dummy>::value &&
+                            lib::dependent_type<lib::is_swappable<Ts>,
+                                                Dummy>::value)...>::value,
+                  int> = 0>
+    inline void swap(variant &that) noexcept(
+        lib::all<(std::is_nothrow_move_constructible<Ts>::value &&
+                  lib::is_nothrow_swappable<Ts>::value)...>::value) {
+      impl_.swap(that.impl_);
+    }
+
+    private:
+    detail::impl<Ts...> impl_;
+
+    friend struct detail::access::variant;
+    friend struct detail::visitation::variant;
+  };
+
+  template <std::size_t I, typename... Ts>
+  inline constexpr bool holds_alternative(const variant<Ts...> &v) noexcept {
+    return v.index() == I;
+  }
+
+  template <typename T, typename... Ts>
+  inline constexpr bool holds_alternative(const variant<Ts...> &v) noexcept {
+    return holds_alternative<detail::find_index_checked<T, Ts...>::value>(v);
+  }
+
+  namespace detail {
+    template <std::size_t I, typename V>
+    struct generic_get_impl {
+      constexpr generic_get_impl(int) noexcept {}
+
+      constexpr AUTO_REFREF operator()(V &&v) const
+        AUTO_REFREF_RETURN(
+            access::variant::get_alt<I>(lib::forward<V>(v)).value)
+    };
+
+    template <std::size_t I, typename V>
+    inline constexpr AUTO_REFREF generic_get(V &&v)
+      AUTO_REFREF_RETURN(generic_get_impl<I, V>(
+          holds_alternative<I>(v) ? 0 : (throw_bad_variant_access(), 0))(
+          lib::forward<V>(v)))
+  }  // namespace detail
+
+  template <std::size_t I, typename... Ts>
+  inline constexpr variant_alternative_t<I, variant<Ts...>> &get(
+      variant<Ts...> &v) {
+    return detail::generic_get<I>(v);
+  }
+
+  template <std::size_t I, typename... Ts>
+  inline constexpr variant_alternative_t<I, variant<Ts...>> &&get(
+      variant<Ts...> &&v) {
+    return detail::generic_get<I>(lib::move(v));
+  }
+
+  template <std::size_t I, typename... Ts>
+  inline constexpr const variant_alternative_t<I, variant<Ts...>> &get(
+      const variant<Ts...> &v) {
+    return detail::generic_get<I>(v);
+  }
+
+  template <std::size_t I, typename... Ts>
+  inline constexpr const variant_alternative_t<I, variant<Ts...>> &&get(
+      const variant<Ts...> &&v) {
+    return detail::generic_get<I>(lib::move(v));
+  }
+
+  template <typename T, typename... Ts>
+  inline constexpr T &get(variant<Ts...> &v) {
+    return get<detail::find_index_checked<T, Ts...>::value>(v);
+  }
+
+  template <typename T, typename... Ts>
+  inline constexpr T &&get(variant<Ts...> &&v) {
+    return get<detail::find_index_checked<T, Ts...>::value>(lib::move(v));
+  }
+
+  template <typename T, typename... Ts>
+  inline constexpr const T &get(const variant<Ts...> &v) {
+    return get<detail::find_index_checked<T, Ts...>::value>(v);
+  }
+
+  template <typename T, typename... Ts>
+  inline constexpr const T &&get(const variant<Ts...> &&v) {
+    return get<detail::find_index_checked<T, Ts...>::value>(lib::move(v));
+  }
+
+  namespace detail {
+
+    template <std::size_t I, typename V>
+    inline constexpr /* auto * */ AUTO generic_get_if(V *v) noexcept
+      AUTO_RETURN(v && holds_alternative<I>(*v)
+                      ? lib::addressof(access::variant::get_alt<I>(*v).value)
+                      : nullptr)
+
+  }  // namespace detail
+
+  template <std::size_t I, typename... Ts>
+  inline constexpr lib::add_pointer_t<variant_alternative_t<I, variant<Ts...>>>
+  get_if(variant<Ts...> *v) noexcept {
+    return detail::generic_get_if<I>(v);
+  }
+
+  template <std::size_t I, typename... Ts>
+  inline constexpr lib::add_pointer_t<
+      const variant_alternative_t<I, variant<Ts...>>>
+  get_if(const variant<Ts...> *v) noexcept {
+    return detail::generic_get_if<I>(v);
+  }
+
+  template <typename T, typename... Ts>
+  inline constexpr lib::add_pointer_t<T>
+  get_if(variant<Ts...> *v) noexcept {
+    return get_if<detail::find_index_checked<T, Ts...>::value>(v);
+  }
+
+  template <typename T, typename... Ts>
+  inline constexpr lib::add_pointer_t<const T>
+  get_if(const variant<Ts...> *v) noexcept {
+    return get_if<detail::find_index_checked<T, Ts...>::value>(v);
+  }
+
+  namespace detail {
+    template <typename RelOp>
+    struct convert_to_bool {
+      template <typename Lhs, typename Rhs>
+      inline constexpr bool operator()(Lhs &&lhs, Rhs &&rhs) const {
+        static_assert(std::is_convertible<lib::invoke_result_t<RelOp, Lhs, Rhs>,
+                                          bool>::value,
+                      "relational operators must return a type"
+                      " implicitly convertible to bool");
+        return lib::invoke(
+            RelOp{}, lib::forward<Lhs>(lhs), lib::forward<Rhs>(rhs));
+      }
+    };
+  }  // namespace detail
+
+  template <typename... Ts>
+  inline constexpr bool operator==(const variant<Ts...> &lhs,
+                                   const variant<Ts...> &rhs) {
+    using detail::visitation::variant;
+    using equal_to = detail::convert_to_bool<lib::equal_to>;
+#ifdef MPARK_CPP14_CONSTEXPR
+    if (lhs.index() != rhs.index()) return false;
+    if (lhs.valueless_by_exception()) return true;
+    return variant::visit_value_at(lhs.index(), equal_to{}, lhs, rhs);
+#else
+    return lhs.index() == rhs.index() &&
+           (lhs.valueless_by_exception() ||
+            variant::visit_value_at(lhs.index(), equal_to{}, lhs, rhs));
+#endif
+  }
+
+  template <typename... Ts>
+  inline constexpr bool operator!=(const variant<Ts...> &lhs,
+                                   const variant<Ts...> &rhs) {
+    using detail::visitation::variant;
+    using not_equal_to = detail::convert_to_bool<lib::not_equal_to>;
+#ifdef MPARK_CPP14_CONSTEXPR
+    if (lhs.index() != rhs.index()) return true;
+    if (lhs.valueless_by_exception()) return false;
+    return variant::visit_value_at(lhs.index(), not_equal_to{}, lhs, rhs);
+#else
+    return lhs.index() != rhs.index() ||
+           (!lhs.valueless_by_exception() &&
+            variant::visit_value_at(lhs.index(), not_equal_to{}, lhs, rhs));
+#endif
+  }
+
+  template <typename... Ts>
+  inline constexpr bool operator<(const variant<Ts...> &lhs,
+                                  const variant<Ts...> &rhs) {
+    using detail::visitation::variant;
+    using less = detail::convert_to_bool<lib::less>;
+#ifdef MPARK_CPP14_CONSTEXPR
+    if (rhs.valueless_by_exception()) return false;
+    if (lhs.valueless_by_exception()) return true;
+    if (lhs.index() < rhs.index()) return true;
+    if (lhs.index() > rhs.index()) return false;
+    return variant::visit_value_at(lhs.index(), less{}, lhs, rhs);
+#else
+    return !rhs.valueless_by_exception() &&
+           (lhs.valueless_by_exception() || lhs.index() < rhs.index() ||
+            (lhs.index() == rhs.index() &&
+             variant::visit_value_at(lhs.index(), less{}, lhs, rhs)));
+#endif
+  }
+
+  template <typename... Ts>
+  inline constexpr bool operator>(const variant<Ts...> &lhs,
+                                  const variant<Ts...> &rhs) {
+    using detail::visitation::variant;
+    using greater = detail::convert_to_bool<lib::greater>;
+#ifdef MPARK_CPP14_CONSTEXPR
+    if (lhs.valueless_by_exception()) return false;
+    if (rhs.valueless_by_exception()) return true;
+    if (lhs.index() > rhs.index()) return true;
+    if (lhs.index() < rhs.index()) return false;
+    return variant::visit_value_at(lhs.index(), greater{}, lhs, rhs);
+#else
+    return !lhs.valueless_by_exception() &&
+           (rhs.valueless_by_exception() || lhs.index() > rhs.index() ||
+            (lhs.index() == rhs.index() &&
+             variant::visit_value_at(lhs.index(), greater{}, lhs, rhs)));
+#endif
+  }
+
+  template <typename... Ts>
+  inline constexpr bool operator<=(const variant<Ts...> &lhs,
+                                   const variant<Ts...> &rhs) {
+    using detail::visitation::variant;
+    using less_equal = detail::convert_to_bool<lib::less_equal>;
+#ifdef MPARK_CPP14_CONSTEXPR
+    if (lhs.valueless_by_exception()) return true;
+    if (rhs.valueless_by_exception()) return false;
+    if (lhs.index() < rhs.index()) return true;
+    if (lhs.index() > rhs.index()) return false;
+    return variant::visit_value_at(lhs.index(), less_equal{}, lhs, rhs);
+#else
+    return lhs.valueless_by_exception() ||
+           (!rhs.valueless_by_exception() &&
+            (lhs.index() < rhs.index() ||
+             (lhs.index() == rhs.index() &&
+              variant::visit_value_at(lhs.index(), less_equal{}, lhs, rhs))));
+#endif
+  }
+
+  template <typename... Ts>
+  inline constexpr bool operator>=(const variant<Ts...> &lhs,
+                                   const variant<Ts...> &rhs) {
+    using detail::visitation::variant;
+    using greater_equal = detail::convert_to_bool<lib::greater_equal>;
+#ifdef MPARK_CPP14_CONSTEXPR
+    if (rhs.valueless_by_exception()) return true;
+    if (lhs.valueless_by_exception()) return false;
+    if (lhs.index() > rhs.index()) return true;
+    if (lhs.index() < rhs.index()) return false;
+    return variant::visit_value_at(lhs.index(), greater_equal{}, lhs, rhs);
+#else
+    return rhs.valueless_by_exception() ||
+           (!lhs.valueless_by_exception() &&
+            (lhs.index() > rhs.index() ||
+             (lhs.index() == rhs.index() &&
+              variant::visit_value_at(
+                  lhs.index(), greater_equal{}, lhs, rhs))));
+#endif
+  }
+
+  struct monostate {};
+
+  inline constexpr bool operator<(monostate, monostate) noexcept {
+    return false;
+  }
+
+  inline constexpr bool operator>(monostate, monostate) noexcept {
+    return false;
+  }
+
+  inline constexpr bool operator<=(monostate, monostate) noexcept {
+    return true;
+  }
+
+  inline constexpr bool operator>=(monostate, monostate) noexcept {
+    return true;
+  }
+
+  inline constexpr bool operator==(monostate, monostate) noexcept {
+    return true;
+  }
+
+  inline constexpr bool operator!=(monostate, monostate) noexcept {
+    return false;
+  }
+
+#ifdef MPARK_CPP14_CONSTEXPR
+  namespace detail {
+
+    inline constexpr bool any(std::initializer_list<bool> bs) {
+      for (bool b : bs) {
+        if (b) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+  }  // namespace detail
+
+  template <typename Visitor, typename... Vs>
+  inline constexpr decltype(auto) visit(Visitor &&visitor, Vs &&... vs) {
+    return (!detail::any({vs.valueless_by_exception()...})
+                ? (void)0
+                : throw_bad_variant_access()),
+           detail::visitation::variant::visit_value(
+               lib::forward<Visitor>(visitor), lib::forward<Vs>(vs)...);
+  }
+#else
+  namespace detail {
+
+    template <std::size_t N>
+    inline constexpr bool all_impl(const lib::array<bool, N> &bs,
+                                   std::size_t idx) {
+      return idx >= N || (bs[idx] && all_impl(bs, idx + 1));
+    }
+
+    template <std::size_t N>
+    inline constexpr bool all(const lib::array<bool, N> &bs) {
+      return all_impl(bs, 0);
+    }
+
+  }  // namespace detail
+
+  template <typename Visitor, typename... Vs>
+  inline constexpr DECLTYPE_AUTO visit(Visitor &&visitor, Vs &&... vs)
+    DECLTYPE_AUTO_RETURN(
+        (detail::all(
+             lib::array<bool, sizeof...(Vs)>{{!vs.valueless_by_exception()...}})
+             ? (void)0
+             : throw_bad_variant_access()),
+        detail::visitation::variant::visit_value(lib::forward<Visitor>(visitor),
+                                                 lib::forward<Vs>(vs)...))
+#endif
+
+  template <typename... Ts>
+  inline auto swap(variant<Ts...> &lhs,
+                   variant<Ts...> &rhs) noexcept(noexcept(lhs.swap(rhs)))
+      -> decltype(lhs.swap(rhs)) {
+    lhs.swap(rhs);
+  }
+
+  namespace detail {
+
+    template <typename T, typename...>
+    using enabled_type = T;
+
+    namespace hash {
+
+      template <typename H, typename K>
+      constexpr bool meets_requirements() noexcept {
+        return std::is_copy_constructible<H>::value &&
+               std::is_move_constructible<H>::value &&
+               lib::is_invocable_r<std::size_t, H, const K &>::value;
+      }
+
+      template <typename K>
+      constexpr bool is_enabled() noexcept {
+        using H = std::hash<K>;
+        return meets_requirements<H, K>() &&
+               std::is_default_constructible<H>::value &&
+               std::is_copy_assignable<H>::value &&
+               std::is_move_assignable<H>::value;
+      }
+
+    }  // namespace hash
+
+  }  // namespace detail
+
+#undef AUTO
+#undef AUTO_RETURN
+
+#undef AUTO_REFREF
+#undef AUTO_REFREF_RETURN
+
+#undef DECLTYPE_AUTO
+#undef DECLTYPE_AUTO_RETURN
+
+}  // namespace mpark
+
+namespace std {
+
+  template <typename... Ts>
+  struct hash<mpark::detail::enabled_type<
+      mpark::variant<Ts...>,
+      mpark::lib::enable_if_t<mpark::lib::all<mpark::detail::hash::is_enabled<
+          mpark::lib::remove_const_t<Ts>>()...>::value>>> {
+    using argument_type = mpark::variant<Ts...>;
+    using result_type = std::size_t;
+
+    inline result_type operator()(const argument_type &v) const {
+      using mpark::detail::visitation::variant;
+      std::size_t result =
+          v.valueless_by_exception()
+              ? 299792458  // Random value chosen by the universe upon creation
+              : variant::visit_alt(
+#ifdef MPARK_GENERIC_LAMBDAS
+                    [](const auto &alt) {
+                      using alt_type = mpark::lib::decay_t<decltype(alt)>;
+                      using value_type = mpark::lib::remove_const_t<
+                          typename alt_type::value_type>;
+                      return hash<value_type>{}(alt.value);
+                    }
+#else
+                    hasher{}
+#endif
+                    ,
+                    v);
+      return hash_combine(result, hash<std::size_t>{}(v.index()));
+    }
+
+    private:
+#ifndef MPARK_GENERIC_LAMBDAS
+    struct hasher {
+      template <typename Alt>
+      inline std::size_t operator()(const Alt &alt) const {
+        using alt_type = mpark::lib::decay_t<Alt>;
+        using value_type =
+            mpark::lib::remove_const_t<typename alt_type::value_type>;
+        return hash<value_type>{}(alt.value);
+      }
+    };
+#endif
+
+    static std::size_t hash_combine(std::size_t lhs, std::size_t rhs) {
+      return lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2);
+    }
+  };
+
+  template <>
+  struct hash<mpark::monostate> {
+    using argument_type = mpark::monostate;
+    using result_type = std::size_t;
+
+    inline result_type operator()(const argument_type &) const noexcept {
+      return 66740831;  // return a fundamentally attractive random value.
+    }
+  };
+
+}  // namespace std
+
+#endif  // MPARK_VARIANT_HPP

+ 195 - 0
3rd/numpy/include/xtl/xvisitor.hpp

@@ -0,0 +1,195 @@
+/***************************************************************************
+* Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht          *
+* Copyright (c) QuantStack                                                 *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XTL_VISITOR_HPP
+#define XTL_VISITOR_HPP
+
+#include <stdexcept>
+#include "xmeta_utils.hpp"
+
+namespace xtl
+{
+    // Loki's visitor ported to C++14
+    // Original implementation can be found at:
+    // https://github.com/snaewe/loki-lib/blob/master/include/loki/Visitor.h
+
+    /****************
+     * base_visitor *
+     ****************/
+
+    class base_visitor
+    {
+    public:
+
+        virtual ~base_visitor() = default;
+    };
+
+    /***********
+     * visitor *
+     ***********/
+
+    template <class T, class R = void, bool is_const = true>
+    class visitor
+    {
+    public:
+
+        using return_type = R;
+        using param_type = std::conditional_t<is_const, const T, T>;
+
+        virtual ~visitor() = default;
+
+        virtual return_type visit(param_type&) = 0;
+    };
+
+    template <class R, bool is_const>
+    class visitor<mpl::vector<>, R, is_const>
+    {
+    };
+
+    template <class R, bool is_const, class T, class... U>
+    class visitor<mpl::vector<T, U...>, R, is_const>
+        : public visitor<T, R, is_const>
+        , public visitor<mpl::vector<U...>, R, is_const>
+    {
+    };
+
+    /**********************
+     * catch_all policies *
+     **********************/
+
+    template <class R, class T>
+    struct default_catch_all
+    {
+        static R on_unknown_visitor(T&, base_visitor&)
+        {
+            return R();
+        }
+    };
+
+    template <class R, class T>
+    struct throwing_catch_all
+    {
+        static R on_unknown_visitor(T&, base_visitor&)
+        {
+            XTL_THROW(std::runtime_error, "Unknown visited type");
+        }
+    };
+
+    /******************
+     * base_visitable *
+     ******************/
+
+    template
+    <
+        class R = void,
+        bool const_visitable = false,
+        template <class, class> class catch_all = default_catch_all
+    >
+    class base_visitable;
+
+    template <class R, template <class, class> class catch_all>
+    class base_visitable<R, false, catch_all>
+    {
+    public:
+
+        using return_type = R;
+
+        virtual ~base_visitable() = default;
+        virtual return_type accept(base_visitor&) = 0;
+
+    protected:
+
+        template <class T>
+        static return_type accept_impl(T& visited, base_visitor& vis)
+        {
+            if (auto* p = dynamic_cast<visitor<T, R, false>*>(&vis))
+            {
+                return p->visit(visited);
+            }
+            return catch_all<R, T>::on_unknown_visitor(visited, vis);
+        }
+    };
+
+    template <class R, template <class, class> class catch_all>
+    class base_visitable<R, true, catch_all>
+    {
+    public:
+
+        using return_type = R;
+
+        virtual ~base_visitable() = default;
+        virtual return_type accept(base_visitor&) const = 0;
+
+    protected:
+
+        template <class T>
+        static return_type accept_impl(const T& visited, base_visitor& vis)
+        {
+            if (auto* p = dynamic_cast<visitor<T, R, true>*>(&vis))
+            {
+                return p->visit(visited);
+            }
+            return catch_all<R, const T>::on_unknown_visitor(visited, vis);
+        }
+    };
+
+    /************************
+     * XTL_DEFINE_VISITABLE *
+     ************************/
+
+#define XTL_DEFINE_VISITABLE() \
+    return_type accept(::xtl::base_visitor& vis) override \
+    { return accept_impl(*this, vis); }
+
+#define XTL_DEFINE_CONST_VISITABLE() \
+    return_type accept(::xtl::base_visitor& vis) const override \
+    { return accept_impl(*this, vis); }
+
+    /******************
+     * cyclic_visitor *
+     ******************/
+
+    template <class T, class R, bool is_const = true>
+    class cyclic_visitor;
+
+    template <class R, bool is_const, class... T>
+    class cyclic_visitor<mpl::vector<T...>, R, is_const>
+        : public visitor<mpl::vector<T...>, R, is_const>
+    {
+    public:
+
+        using return_type = R;
+
+        template <class V>
+        return_type generic_visit(V& visited)
+        {
+            visitor<std::remove_const_t<V>, return_type, is_const>& sub_obj = *this;
+            return sub_obj.visit(visited);
+        }
+    };
+
+    /*******************************
+     * XTL_DEFINE_CYCLIC_VISITABLE *
+     *******************************/
+
+#define XTL_DEFINE_CYCLIC_VISITABLE(some_visitor)                     \
+    virtual some_visitor::return_type accept(some_visitor& vis)       \
+    {                                                                 \
+        return vis.generic_visit(*this);                              \
+    }
+
+#define XTL_DEFINE_CONST_CYCLIC_VISITABLE(some_visitor)               \
+    virtual some_visitor::return_type accept(some_visitor& vis) const \
+    {                                                                 \
+        return vis.generic_visit(*this);                              \
+    }
+}
+
+#endif
+

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff