str.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. #pragma once
  2. #include "common.h"
  3. #include "memory.h"
  4. #include "vector.h"
  5. namespace pkpy {
  6. int utf8len(unsigned char c, bool suppress=false);
  7. struct SStream;
  8. struct Str{
  9. int size;
  10. bool is_ascii;
  11. char* data;
  12. char _inlined[16];
  13. bool is_inlined() const { return data == _inlined; }
  14. Str();
  15. Str(int size, bool is_ascii);
  16. Str(const std::string& s);
  17. Str(std::string_view s);
  18. Str(const char* s);
  19. Str(const char* s, int len);
  20. Str(std::pair<char *, int>);
  21. Str(const Str& other);
  22. Str(Str&& other);
  23. operator std::string_view() const { return sv(); }
  24. const char* begin() const { return data; }
  25. const char* end() const { return data + size; }
  26. char operator[](int idx) const { return data[idx]; }
  27. int length() const { return size; }
  28. bool empty() const { return size == 0; }
  29. size_t hash() const{ return std::hash<std::string_view>()(sv()); }
  30. Str& operator=(const Str&);
  31. Str operator+(const Str&) const;
  32. Str operator+(const char*) const;
  33. friend Str operator+(const char*, const Str&);
  34. bool operator==(const std::string_view other) const;
  35. bool operator!=(const std::string_view other) const;
  36. bool operator<(const std::string_view other) const;
  37. friend bool operator<(const std::string_view other, const Str& str);
  38. bool operator==(const char* p) const;
  39. bool operator!=(const char* p) const;
  40. bool operator==(const Str& other) const;
  41. bool operator!=(const Str& other) const;
  42. bool operator<(const Str& other) const;
  43. bool operator>(const Str& other) const;
  44. bool operator<=(const Str& other) const;
  45. bool operator>=(const Str& other) const;
  46. ~Str();
  47. friend std::ostream& operator<<(std::ostream& os, const Str& str);
  48. const char* c_str() const { return data; }
  49. std::string_view sv() const { return std::string_view(data, size); }
  50. std::string str() const { return std::string(data, size); }
  51. Str substr(int start, int len) const;
  52. Str substr(int start) const;
  53. Str strip(bool left, bool right, const Str& chars) const;
  54. Str strip(bool left=true, bool right=true) const;
  55. Str lstrip() const { return strip(true, false); }
  56. Str rstrip() const { return strip(false, true); }
  57. Str lower() const;
  58. Str upper() const;
  59. Str escape(bool single_quote=true) const;
  60. void escape_(SStream& ss, bool single_quote=true) const;
  61. int index(const Str& sub, int start=0) const;
  62. Str replace(char old, char new_) const;
  63. Str replace(const Str& old, const Str& new_, int count=-1) const;
  64. pod_vector<std::string_view> split(const Str& sep) const;
  65. pod_vector<std::string_view> split(char sep) const;
  66. int count(const Str& sub) const;
  67. /*************unicode*************/
  68. int _unicode_index_to_byte(int i) const;
  69. int _byte_index_to_unicode(int n) const;
  70. Str u8_getitem(int i) const;
  71. Str u8_slice(int start, int stop, int step) const;
  72. int u8_length() const;
  73. };
  74. struct StrName {
  75. uint16_t index;
  76. StrName(): index(0) {}
  77. explicit StrName(uint16_t index): index(index) {}
  78. StrName(const char* s): index(get(s).index) {}
  79. StrName(const Str& s): index(get(s.sv()).index) {}
  80. std::string_view sv() const { return _r_interned()[index];}
  81. const char* c_str() const { return _r_interned()[index].c_str(); }
  82. bool empty() const { return index == 0; }
  83. Str escape() const { return Str(sv()).escape(); }
  84. bool operator==(const StrName& other) const noexcept {
  85. return this->index == other.index;
  86. }
  87. bool operator!=(const StrName& other) const noexcept {
  88. return this->index != other.index;
  89. }
  90. bool operator<(const StrName& other) const noexcept {
  91. return sv() < other.sv();
  92. }
  93. bool operator>(const StrName& other) const noexcept {
  94. return sv() > other.sv();
  95. }
  96. static bool is_valid(int index);
  97. static StrName get(std::string_view s);
  98. static std::map<std::string, uint16_t, std::less<>>& _interned();
  99. static std::map<uint16_t, std::string>& _r_interned();
  100. static uint32_t _pesudo_random_index;
  101. };
  102. struct SStream{
  103. PK_ALWAYS_PASS_BY_POINTER(SStream)
  104. // pod_vector<T> is allocated by pool64 so the buffer can be moved into Str without a copy
  105. pod_vector<char> buffer;
  106. int _precision = -1;
  107. bool empty() const { return buffer.empty(); }
  108. void setprecision(int precision) { _precision = precision; }
  109. SStream(){}
  110. SStream(int guess_size){ buffer.reserve(guess_size); }
  111. Str str();
  112. SStream& operator<<(const Str&);
  113. SStream& operator<<(const char*);
  114. SStream& operator<<(int);
  115. SStream& operator<<(size_t);
  116. SStream& operator<<(i64);
  117. SStream& operator<<(f64);
  118. SStream& operator<<(const std::string&);
  119. SStream& operator<<(std::string_view);
  120. SStream& operator<<(char);
  121. SStream& operator<<(StrName);
  122. void write_hex(unsigned char, bool non_zero=false);
  123. void write_hex(void*);
  124. void write_hex(i64);
  125. };
  126. #ifdef _S
  127. #undef _S
  128. #endif
  129. template<typename... Args>
  130. Str _S(Args&&... args) {
  131. SStream ss;
  132. (ss << ... << args);
  133. return ss.str();
  134. }
  135. struct CString{
  136. const char* ptr;
  137. CString(const char* ptr): ptr(ptr) {}
  138. operator const char*() const { return ptr; }
  139. };
  140. // unary operators
  141. const StrName __repr__ = StrName::get("__repr__");
  142. const StrName __str__ = StrName::get("__str__");
  143. const StrName __hash__ = StrName::get("__hash__"); // unused
  144. const StrName __len__ = StrName::get("__len__");
  145. const StrName __iter__ = StrName::get("__iter__");
  146. const StrName __next__ = StrName::get("__next__"); // unused
  147. const StrName __neg__ = StrName::get("__neg__"); // unused
  148. const StrName __bool__ = StrName::get("__bool__"); // unused
  149. // logical operators
  150. const StrName __eq__ = StrName::get("__eq__");
  151. const StrName __lt__ = StrName::get("__lt__");
  152. const StrName __le__ = StrName::get("__le__");
  153. const StrName __gt__ = StrName::get("__gt__");
  154. const StrName __ge__ = StrName::get("__ge__");
  155. const StrName __contains__ = StrName::get("__contains__");
  156. // binary operators
  157. const StrName __add__ = StrName::get("__add__");
  158. const StrName __radd__ = StrName::get("__radd__");
  159. const StrName __sub__ = StrName::get("__sub__");
  160. const StrName __rsub__ = StrName::get("__rsub__");
  161. const StrName __mul__ = StrName::get("__mul__");
  162. const StrName __rmul__ = StrName::get("__rmul__");
  163. const StrName __truediv__ = StrName::get("__truediv__");
  164. const StrName __floordiv__ = StrName::get("__floordiv__");
  165. const StrName __mod__ = StrName::get("__mod__");
  166. const StrName __pow__ = StrName::get("__pow__");
  167. const StrName __matmul__ = StrName::get("__matmul__");
  168. const StrName __lshift__ = StrName::get("__lshift__");
  169. const StrName __rshift__ = StrName::get("__rshift__");
  170. const StrName __and__ = StrName::get("__and__");
  171. const StrName __or__ = StrName::get("__or__");
  172. const StrName __xor__ = StrName::get("__xor__");
  173. const StrName __invert__ = StrName::get("__invert__");
  174. // indexer
  175. const StrName __getitem__ = StrName::get("__getitem__");
  176. const StrName __setitem__ = StrName::get("__setitem__");
  177. const StrName __delitem__ = StrName::get("__delitem__");
  178. // specials
  179. const StrName __new__ = StrName::get("__new__");
  180. const StrName __init__ = StrName::get("__init__");
  181. const StrName __call__ = StrName::get("__call__");
  182. const StrName __divmod__ = StrName::get("__divmod__");
  183. const StrName __enter__ = StrName::get("__enter__");
  184. const StrName __exit__ = StrName::get("__exit__");
  185. const StrName __name__ = StrName::get("__name__");
  186. const StrName __all__ = StrName::get("__all__");
  187. const StrName __package__ = StrName::get("__package__");
  188. const StrName __path__ = StrName::get("__path__");
  189. const StrName __class__ = StrName::get("__class__");
  190. const StrName pk_id_add = StrName::get("add");
  191. const StrName pk_id_set = StrName::get("set");
  192. const StrName pk_id_long = StrName::get("long");
  193. const StrName pk_id_complex = StrName::get("complex");
  194. #define DEF_SNAME(name) const static StrName name(#name)
  195. } // namespace pkpy