Euphoria
stringutils.cc
Go to the documentation of this file.
1 #include "base/stringutils.h"
2 
3 #include <algorithm>
4 #include <cstring>
5 #include <iterator>
6 #include <type_traits>
7 
8 #include "assert/assert.h"
9 
10 #include "base/stringbuilder.h"
11 
12 
13 namespace eu
14 {
15 
16 std::pair<std::string, std::string>
17 get_last_string(const std::string& str, char sep)
18 {
19  auto result = str.find(sep);
20  if(result == std::string::npos)
21  {
22  return std::make_pair(str, "");
23  }
24 
25  const auto parent = str.substr(0, result);
26  const auto child = str.substr(result, str.length() - parent.length());
27  return std::make_pair(parent, child);
28 }
29 
30 
31 std::string
32 get_first_chars(const std::string& str, std::size_t count)
33 {
34  if(str.length() < count) { return str; }
35  else { return str.substr(0, count); }
36 }
37 
38 
39 std::string
40 get_first_chars_with_ellipsis(const std::string& str, unsigned int count)
41 {
42  if (str.length() > count)
43  {
44  return str.substr(0, count) + "...";
45  }
46 
47  return str;
48 }
49 
50 
51 std::string
52 strip_last_string(const std::string& str, char sep)
53 {
54  auto result = str.find(sep);
55  if(result == std::string::npos)
56  {
57  return "";
58  }
59 
60  return str.substr(0, result);
61 }
62 
63 
64 std::string
65 trim_right(const std::string& string_to_trim, std::string_view trim_characters)
66 {
67  return std::string(string_to_trim).erase(string_to_trim.find_last_not_of(trim_characters) + 1);
68 }
69 
70 
71 std::string
72 trim_left(const std::string& string_to_trim, std::string_view trim_characters)
73 {
74  return std::string(string_to_trim).erase(0, string_to_trim.find_first_not_of(trim_characters));
75 }
76 
77 
78 std::string
79 trim(const std::string& string_to_trim, std::string_view trim_characters)
80 {
81  return trim_right(trim_left(string_to_trim, trim_characters), trim_characters);
82 }
83 
84 
85 // todo(Gustav): rename to begins_with
86 bool
87 begins_with(const std::string& string_to_test, const std::string& start)
88 {
89  const std::string::size_type length = start.length();
90  const std::string::size_type other_length = string_to_test.length();
91  if(other_length < length)
92  {
93  return false;
94  }
95  const std::string actual_start = string_to_test.substr(0, length);
96  return start == actual_start;
97 }
98 
99 
100 bool
101 ends_with(const std::string& string_to_test, const std::string& end)
102 {
103  const std::string::size_type length = end.length();
104  const std::string::size_type other_length = string_to_test.length();
105  if(other_length < length)
106  {
107  return false;
108  }
109  const std::string actual_end
110  = string_to_test.substr(other_length - length, length);
111  return end == actual_end;
112 }
113 
114 
115 char
117 {
118  if(b >= 'A' && b <= 'Z')
119  {
120  return static_cast<char>((static_cast<int>(b) - 'A') + 'a');
121  }
122  else
123  {
124  return b;
125  }
126 }
127 
128 char
130 {
131  if(b >= 'a' && b <= 'z')
132  {
133  return static_cast<char>(b + ('A' - 'a'));
134  }
135  else
136  {
137  return b;
138  }
139 }
140 
141 
142 std::string
143 to_lower(const std::string& str)
144 {
145  std::string result = str;
146  std::transform(result.begin(), result.end(), result.begin(), to_lower_char);
147  return result;
148 }
149 
150 
151 std::vector<std::string>
152 to_lower(const std::vector<std::string>& str)
153 {
154  return to_string_vector(str, [](const std::string& s) { return to_lower(s); });
155 }
156 
157 
158 std::string
159 to_upper(const std::string& str)
160 {
161  std::string result = str;
162  std::transform(result.begin(), result.end(), result.begin(), to_upper_char);
163  return result;
164 }
165 
166 
167 std::string
169 {
170  const auto name = [&]() -> std::string
171  {
172  switch (c)
173  {
174  case 0: return "<null>";
175  case '\n': return "<\\n>";
176  case '\r': return "<\\r>";
177  case '\t': return "<tab>";
178  // source: http://www.asciitable.com/
179  case 1: return "<start of heading>";
180  case 2: return "<start of text>";
181  case 3: return "<end of text>";
182  case 4: return "<end of transmission>";
183  case 5: return "<enquiry>";
184  case 6: return "<acknowledge>";
185  case 7: return "<bell>";
186  case 8: return "<backspace>";
187  // case 9: return "<horizontal tab>";
188  // case 10: return "<newline>";
189  case 11: return "<vertical tab>";
190  case 12: return "<new page>";
191  // case 13: return "<carriage return>";
192  case 14: return "<shift out>";
193  case 15: return "<shift in>";
194  case 16: return "<data link esqape>";
195  case 17: return "<device control 1>";
196  case 18: return "<device control 2>";
197  case 19: return "<device control 3>";
198  case 20: return "<device control 4>";
199  case 21: return "<negative acknowledge>";
200  case 22: return "<synchronous idle>";
201  case 23: return "<end of trans. block>";
202  case 24: return "<cancel>";
203  case 25: return "<end of medium>";
204  case 26: return "<substitute>";
205  case 27: return "<escape>";
206  case 28: return "<file separator>";
207  case 29: return "<group separator>";
208  case 30: return "<record separator>";
209  case 31: return "<unit separator>";
210  case 127: return "<DEL>";
211  case ' ': return "<space>";
212  default: return fmt::to_string(c);
213  }
214  }();
215 
216  if (c == 0 && style == CharToStringStyle::smart)
217  {
218  return name;
219  }
220 
221  constexpr std::string_view smart_characters =
222  "abcdefghijklmnopqrstuwxyz"
223  "ABCDEFGHIJKLMNOPQRSTUWXYZ"
224  " "
225  "~!@#$%^&*()_+"
226  "`123456790-="
227  ",.<>/?"
228  "{}[]:;\"'\\|"
229  "\n\r\t"
230  ;
231 
232  if(style == CharToStringStyle::include_hex || smart_characters.find(c) == std::string_view::npos)
233  {
234  return fmt::format("{}({:#x})", name, c);
235  }
236  else
237  {
238  return name;
239  }
240 }
241 
242 
243 std::string::size_type
244 find_first_index_of_mismatch(const std::string& lhs, const std::string& rhs)
245 {
246  const auto end = std::min(lhs.size(), rhs.size());
247 
248  std::string::size_type index = 0;
249  for(; index < end; index+=1)
250  {
251  if(lhs[index]!=rhs[index])
252  {
253  return index;
254  }
255  }
256 
257  if(index >= lhs.size() && index >= rhs.size())
258  {
259  return std::string::npos;
260  }
261  else
262  {
263  return end;
264  }
265 }
266 
267 
268 void
269 replace_all(std::string* string, const std::string& to_find, const std::string& to_replace)
270 {
271  std::size_t index = string->find(to_find);
272  const std::size_t find_length = to_find.length();
273  ASSERT(find_length > 0);
274  while(index != std::string::npos)
275  {
276  string->erase(index, find_length);
277  string->insert(index, to_replace);
278  index = string->find(to_find, index);
279  }
280 }
281 
282 
283 std::string
284 replace_all(const std::string& string, const std::string& to_find, const std::string& to_replace)
285 {
286  std::string temp = string;
287  replace_all(&temp, to_find, to_replace);
288  return temp;
289 }
290 
291 
292 void
293 copy(char* dst, const std::string& src, const std::string::size_type& count)
294 {
295  strncpy(dst, src.c_str(), count - 1);
296  dst[count - 1] = 0;
297 }
298 
299 
300 std::string
301 replace_with_character(const std::string& string, const std::string& to_find, char to_replace)
302 {
303  std::string s = string;
304  for(char c: to_find)
305  {
306  std::replace(s.begin(), s.end(), c, to_replace);
307  }
308  return s;
309 }
310 
311 
312 std::string
313 remove_from_end(const std::string& str, const std::string& end)
314 {
315  if(ends_with(str, end))
316  {
317  const auto new_length = str.length() - end.length();
318  if(new_length == 0)
319  {
320  return "";
321  }
322  ASSERT(new_length > 0);
323  return str.substr(0, new_length);
324  }
325 
326  return str;
327 }
328 
329 
330 std::string
331 strip(const std::string& str, const std::string& ch)
332 {
333  auto ss = StringBuilder{};
334  for(const char c: str)
335  {
336  if(ch.find(c) == std::string::npos)
337  {
338  ss.add_char(c);
339  }
340  }
341  return ss.to_string();
342 }
343 
344 
345 // remove all characters in ch except the first one in a chain from str
346 std::string
347 remove_consecutive(const std::string& str, const std::string& ch)
348 {
349  auto ss = StringBuilder{};
350  bool skip = false;
351  for(const char c: str)
352  {
353  if(ch.find(c) == std::string::npos)
354  {
355  ss.add_char(c);
356  skip = false;
357  }
358  else
359  {
360  if(!skip)
361  {
362  ss.add_char(c);
363  skip = true;
364  }
365  }
366  }
367  return ss.to_string();
368 }
369 
370 
371 namespace
372 {
373  enum class AddEmpty
374  {
375  no, yes
376  };
377 
378  enum class AddEmptyLast
379  {
380  no, yes
381  };
382 
383  template
384  <
385  typename IsDelimFunction
386  >
387  std::vector<std::string>
388  split_base(const std::string& str, AddEmpty add_empty, AddEmptyLast add_empty_last, IsDelimFunction&& is_delim)
389  {
390  std::vector<std::string> ret;
391  if (str.empty()) { return ret; }
392 
393  auto buffer = StringBuilder{};
394 
395  for(char c: str)
396  {
397  if (is_delim(c))
398  {
399  if (buffer.has_content())
400  {
401  ret.emplace_back(buffer.to_string());
402  buffer.clear();
403  }
404  else if (add_empty == AddEmpty::yes)
405  {
406  ret.emplace_back("");
407  }
408  }
409  else
410  {
411  buffer.add_char(c);
412  }
413  }
414 
415  if (buffer.has_content())
416  {
417  ret.emplace_back(buffer.to_string());
418  buffer.clear();
419  }
420  else if(add_empty_last == AddEmptyLast::yes)
421  {
422  ret.emplace_back("");
423  }
424 
425  return ret;
426  }
427 }
428 
429 
430 std::vector<std::string>
431 split(const std::string& s, char delim)
432 {
433  return split_base
434  (
435  s, AddEmpty::yes, AddEmptyLast::yes, [&](char c)
436  {
437  return c == delim;
438  }
439  );
440 }
441 
442 
443 std::vector<std::string>
444 split_on_spaces(const std::string& string)
445 {
446  return split_base
447  (
448  string, AddEmpty::no, AddEmptyLast::no, [](char c)
449  {
450  return space_characters.find(c) != std::string::npos;
451  }
452  );
453 }
454 
455 
456 std::string
457 get_string_or_empty(bool b, const std::string& str)
458 {
459  if(b)
460  {
461  return str;
462  }
463  else
464  {
465  return "";
466  }
467 }
468 
469 
470 bool
471 is_number(char b)
472 {
473  return b >= '0' && b <= '9';
474 }
475 
476 
477 int
478 parse_number(const char** aa)
479 {
480  const char*& a = *aa;
481 
482  int result = *a - '0';
483  ++a;
484 
485  while(is_number(*a))
486  {
487  result *= 10;
488  result += *a - '0';
489  ++a;
490  }
491 
492  --a;
493  return result;
494 }
495 
496 
497 int
498 compare_string(const std::string& lhs, const std::string& rhs)
499 {
500  const char* a = lhs.c_str();
501  const char* b = rhs.c_str();
502 
503  if(a == b) { return 0; }
504  if(a == nullptr) { return -1; }
505  if(b == nullptr) { return 1; }
506 
507  while(*a != 0 && *b != 0)
508  {
509  // will contain either a number or a letter
510  const int a0 = is_number(*a) ? parse_number(&a) + 256 : to_lower_char(*a);
511  const int b0 = is_number(*b) ? parse_number(&b) + 256 : to_lower_char(*b);
512 
513  if(a0 < b0) { return -1; }
514  if(a0 > b0) { return 1; }
515 
516  ++a;
517  ++b;
518  }
519 
520  if(*a != 0) { return 1; }
521  if(*b != 0) { return -1; }
522 
523  return 0;
524 }
525 
526 }
#define ASSERT(x)
Definition: assert.h:29
CharToStringStyle
Definition: stringutils.h:113
std::string get_first_chars(const std::string &str, std::size_t count)
Definition: stringutils.cc:32
std::string remove_from_end(const std::string &str, const std::string &end)
Definition: stringutils.cc:313
std::string trim_left(const std::string &string_to_trim, std::string_view trim_characters)
Remove characters from the left, stops at invalid character.
Definition: stringutils.cc:72
std::string remove_consecutive(const std::string &str, const std::string &ch)
Definition: stringutils.cc:347
std::string to_lower(const std::string &str)
Generate a string containing only lower characters.
Definition: stringutils.cc:143
void copy(char *dst, const std::string &src, const std::string::size_type &count)
Copy a string to a character buffer, adding null terminator at the end.
Definition: stringutils.cc:293
std::vector< std::string > split_on_spaces(const std::string &string)
Definition: stringutils.cc:444
std::string get_first_chars_with_ellipsis(const std::string &str, unsigned int count)
Definition: stringutils.cc:40
constexpr std::string_view space_characters
Space characters.
Definition: stringutils.h:24
std::string from_char_to_string(char c, CharToStringStyle style)
Definition: stringutils.cc:168
std::vector< std::string > to_string_vector(const std::map< TKey, TValue > &map)
Definition: stringutils.h:164
std::vector< std::string > split(const std::string &s, char delim)
Definition: stringutils.cc:431
std::string replace_with_character(const std::string &string, const std::string &to_find, char to_replace)
Definition: stringutils.cc:301
void replace_all(std::string *string, const std::string &to_find, const std::string &to_replace)
Replace all occurrences in a string.
Definition: stringutils.cc:269
char to_upper_char(char b)
Definition: stringutils.cc:129
std::string trim_right(const std::string &string_to_trim, std::string_view trim_characters)
Remove characters from the right, stops at a invalid character.
Definition: stringutils.cc:65
int compare_string(const std::string &lhs, const std::string &rhs)
Definition: stringutils.cc:498
std::string to_upper(const std::string &str)
Definition: stringutils.cc:159
bool is_number(char b)
Definition: stringutils.cc:471
std::string get_string_or_empty(bool b, const std::string &str)
Definition: stringutils.cc:457
std::string::size_type find_first_index_of_mismatch(const std::string &lhs, const std::string &rhs)
Definition: stringutils.cc:244
std::pair< std::string, std::string > get_last_string(const std::string &str, char sep)
for hello.dog gets .dog
Definition: stringutils.cc:17
std::string strip(const std::string &str, const std::string &ch)
Definition: stringutils.cc:331
std::string trim(const std::string &string_to_trim, std::string_view trim_characters)
Remove characters from both the start and the end.
Definition: stringutils.cc:79
char to_lower_char(char b)
Definition: stringutils.cc:116
std::string strip_last_string(const std::string &str, char sep)
for hello.dog and . gets hello
Definition: stringutils.cc:52
bool begins_with(const std::string &string_to_test, const std::string &start)
Tests if a string starts with another string.
Definition: stringutils.cc:87
Definition: assert.h:90
int parse_number(const char **aa)
Definition: stringutils.cc:478
bool ends_with(const std::string &str, char c)
Definition: os.cc:139
std::string to_string(const Aabb &a)
Definition: aabb.cc:110
size2f min(const size2f lhs, const size2f rhs)
Definition: size2.cc:140
int ch
Definition: nlp_sentence.cc:92
std::string buffer
Definition: nlp_sentence.cc:87
String utility functions.
std::string to_string()
Complete the builder and return the resulting string.
StringBuilder & add_char(char c)