Euphoria
tracery.cc
Go to the documentation of this file.
1 #include "core/tracery.h"
2 
3 #include "jsonh/jsonh.h"
4 
5 #include "assert/assert.h"
6 
7 #include "base/stringutils.h"
8 #include "base/stringbuilder.h"
9 
10 #include "io/json.h"
11 
12 #include "base/random.h"
13 #include "core/textfileparser.h"
14 
15 
16 
18 {
19  // ----------------------------------------------------------------
20  // Private
21 
23  {
24  Random* generator = nullptr;
25  const tracery::Grammar* grammar = nullptr;
26  std::map<std::string, std::string> overridden_rules;
27  };
28 
29  // ----------------------------------------------------------------
30 
31  Result
32  from_json(Symbol* rule, const jsonh::Value& value, const jsonh::Document* doc)
33  {
34  auto r = [&]() -> Result {
35  if (const auto* str = value.AsString(doc); str != nullptr)
36  {
37  return rule->add_rule(str->value);
38  }
39  else if (const auto* arr = value.AsArray(doc); arr != nullptr)
40  {
41  for (const auto& v : arr->array)
42  {
43  if (const auto* vstr = v.AsString(doc); vstr != nullptr)
44  {
45  Result res = rule->add_rule(vstr->value);
46  if (res == false)
47  {
48  return res;
49  }
50  }
51  else
52  {
53  return { Result::invalid_json };
54  }
55  }
56 
57  return { Result::no_error };
58  }
59  else
60  {
61  return { Result::invalid_json };
62  }
63  }();
64 
65  if(r == false)
66  {
67  // todo(Gustav): add json error information
68  r << "for symbol " << rule->key;
69  }
70 
71  return r;
72  }
73 
74 
75  // ----------------------------------------------------------------
76 
77  Node::~Node() = default;
78 
79  // ----------------------------------------------------------------
80 
81  struct LiteralStringNode : public Node
82  {
83  std::string text;
84 
85  LiteralStringNode(const std::string& t)
86  : text(t)
87  {
88  }
89 
90  Result flatten(GeneratorArgument*) const override
91  {
92  return Result{Result::no_error} << text;
93  }
94  };
95 
96 
97  // ----------------------------------------------------------------
98 
99  struct ActionRule
100  {
101  std::string key;
102  std::string symbol;
103  };
104 
105  struct CallSymbolNode : public Node
106  {
107  std::string symbol;
108  std::vector<std::string> modifiers;
109  std::vector<ActionRule> action_rules;
110 
111  CallSymbolNode() = default;
112 
114  (
115  const std::string& action_key,
116  const std::string& action_symbol
117  )
118  {
119  action_rules.push_back(ActionRule {action_key, action_symbol});
120  }
121 
122  Result flatten(GeneratorArgument* generator) const override
123  {
124  GeneratorArgument arg = *generator;
125 
126  for(const auto& r: action_rules)
127  {
128  const auto result = arg.grammar->get_string_from_symbol(r.symbol, &arg);
129  if(result == false)
130  {
131  return result;
132  }
133  arg.overridden_rules[r.key] = result.get_text();
134  }
135 
136  auto symbol_result = arg.grammar->get_string_from_symbol(symbol, &arg);
137  if(symbol_result == false)
138  {
139  return symbol_result;
140  }
141 
142  auto ret = symbol_result.get_text();
143 
144  for(const auto& f: modifiers)
145  {
146  auto r = generator->grammar->apply_modifier(f, ret);
147  if(r == false)
148  {
149  return r;
150  }
151  ret = r.get_text();
152  }
153 
154  return Result{ Result::no_error } << ret;
155  }
156  };
157 
158 
159  // ----------------------------------------------------------------
160  // Result
161 
162  Result::Result(Error t) : error_type(t) {}
163 
164 
166  Result::operator<<(const std::string& t)
167  {
168  text.push_back(t);
169  return *this;
170  }
171 
172 
173  Result::operator bool() const
174  {
175  return error_type == no_error;
176  }
177 
178 
179  std::string
181  {
182  auto ss = StringBuilder{};
183  for(const auto& s: text)
184  {
185  ss.add_string(s);
186  }
187  return ss.to_string();
188  }
189 
190 
191  std::string to_string(const Result& r)
192  {
193  switch(r.error_type)
194  {
195  case Result::no_error:
196  return "No error detected";
198  return fmt::format("Unable to open file: ", r.get_text());
199  case Result::json_parse:
200  return fmt::format("JSON parse error: ", r.get_text());
202  return fmt::format("Rule not found in grammar: ", r.get_text());
203  case Result::rule_eof:
204  return fmt::format("EOF in rule: ", r.get_text());
206  return "Invalid json state.";
208  return fmt::format("Invalid modifier: ", r.get_text());
210  return fmt::format("Rule parse error: ", r.get_text());
211  default:
212  return "Unhandled error";
213  }
214  }
215 
216 
217  // ----------------------------------------------------------------
218 
219 
220  Rule::Rule() = default;
221 
222 
224  {
226  << parser->peek_string() << " detected but ";
227  }
228 
229 
230  std::string read_tracery_ident(TextfileParser* parser)
231  {
232  const std::string valid
233  = "abcdefghijklmnopqrstuvwxyz"
234  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
235  "0123456789"
236  "_-+";
237 
238  auto ss = StringBuilder{};
239  while(valid.find(parser->peek_char()) != std::string::npos)
240  {
241  ss.add_char(parser->read_char());
242  }
243  return ss.to_string();
244  }
245 
246 
247  Result
248  Rule::compile(const std::string& s)
249  {
250 #define EMPTY_STRING(str, err) \
251  do \
252  { \
253  if(str.empty()) \
254  { \
255  return parse_error(&parser) << err; \
256  } \
257  } while(false)
258 
259 #define EXPECT_CHAR(chr, err) \
260  do \
261  { \
262  if(false == parser.expect_char(chr)) \
263  { \
264  return parse_error(&parser) << err; \
265  } \
266  } while(false)
267 
268  auto parser = TextfileParser::from_string(s);
269  auto buffer = StringBuilder{};
270  while(parser.has_more())
271  {
272  switch(parser.peek_char())
273  {
274  case '\\':
275  parser.read_char();
276  buffer.add_char(parser.read_char());
277  break;
278 
279  case '#': {
280  parser.read_char();
281  const auto text = buffer.to_string();
282  buffer.clear();
283  if(text.empty() == false)
284  {
285  add(std::make_shared<LiteralStringNode>(text));
286  }
287  auto n = std::make_shared<CallSymbolNode>();
288  while(parser.peek_char() == '[')
289  {
290  parser.read_char();
291  const auto key_name = read_tracery_ident(&parser);
292  EMPTY_STRING(key_name, "got empty key");
293 
294  EXPECT_CHAR(':', "expected : after key name");
295  if(parser.peek_char() == '#')
296  {
297  parser.read_char();
298  const auto symbol_name = read_tracery_ident(&parser);
299  EMPTY_STRING(symbol_name, "got empty symbol name");
300  EXPECT_CHAR('#', "expected # to end symbol name");
301  n->add_action_rule(key_name, symbol_name);
302  }
303  else
304  {
305  const auto command = read_tracery_ident(&parser);
306  EMPTY_STRING(command, "got empty command");
307  }
308  EXPECT_CHAR(']', "expected ]");
309  }
310  const auto symbol_name = read_tracery_ident(&parser);
311  EMPTY_STRING(symbol_name, "Empty symbol name");
312  n->symbol = symbol_name;
313  bool run = true;
314  while(run && parser.has_more())
315  {
316  switch(parser.peek_char())
317  {
318  case '.':
319  {
320  parser.read_char();
321  const auto mod = read_tracery_ident(&parser);
322  n->modifiers.push_back(mod);
323  }
324  break;
325 
326  case '#':
327  parser.read_char();
328  run = false;
329  break;
330 
331  default:
332  {
333  const auto c = parser.read_char();
335  << "Unknown character inside ##: "
336  << fmt::to_string(c);
337  }
338  }
339  }
340  add(n);
341  if(run)
342  {
343  return {Result::rule_eof};
344  }
345  }
346  break;
347 
348  default:
349  buffer.add_char(parser.read_char());
350  break;
351  }
352  }
353 
354  const auto text = buffer.to_string();
355  if(text.empty() == false)
356  {
357  add(std::make_shared<LiteralStringNode>(text));
358  }
359 
360  return {Result::no_error};
361 #undef EXPECT_CHAR
362 #undef EMPTY_STRING
363  }
364 
365 
366  Result
368  {
369  std::string ret;
370  for(std::shared_ptr<Node> s: syntax)
371  {
372  const Result r = s->flatten(gen);
373  if(r == false) { return r; }
374  ret += r.get_text();
375  }
376  return Result{Result::no_error} << ret;
377  }
378 
379 
380  void
381  Rule::add(std::shared_ptr<Node> p)
382  {
383  syntax.push_back(p);
384  }
385 
386 
387  // ----------------------------------------------------------------
388  // Symbol
389  Symbol::Symbol(const std::string& k) : key(k) {}
390 
391 
392  Result
393  Symbol::add_rule(const std::string& rule_code)
394  {
395  auto syntax = Rule{};
396  Result r = syntax.compile(rule_code);
397  if(r)
398  {
399  ruleset.push_back(syntax);
400  }
401  return r;
402  }
403 
404 
405  Result
407  {
408  ASSERT(gen);
409  ASSERTX(ruleset.empty() == false, key);
410 
411  return get_random_item_in_vector(gen->generator, ruleset).flatten(gen);
412  }
413 
414 
415  // ----------------------------------------------------------------
416  // Modifier
417 
418  Modifier::~Modifier() = default;
419 
420 
421  // ----------------------------------------------------------------
422  // English
423 
424 
425  namespace english
426  {
427  bool is_vowel(char c)
428  {
429  char c2 = to_lower_char(c);
430  return (c2 == 'a') || (c2 == 'e') || (c2 == 'i') || (c2 == 'o') || (c2 == 'u');
431  }
432 
433 
434  bool is_alpha_num(char c)
435  {
436  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
437  }
438 
439 
440  std::string capitalize_all(const std::string& s)
441  {
442  std::string s2;
443  bool capitalize_next = true;
444  for(char c: s)
445  {
446  if(!is_alpha_num(c))
447  {
448  capitalize_next = true;
449  s2 += c;
450  }
451  else
452  {
453  if(!capitalize_next)
454  {
455  s2 += c;
456  }
457  else
458  {
459  s2 += to_upper_char(c);
460  capitalize_next = false;
461  }
462  }
463  }
464  return s2;
465  }
466 
467 
468  std::string capitalize(const std::string& s)
469  {
470  char c = to_upper_char(s[0]);
471  std::string a = std::string(1, c);
472  std::string b = s.substr(1);
473  std::string cr = a + b;
474  return cr;
475  }
476 
477  // useful for sconverting cat -> "a cat"
478  std::string add_prefix_with_a_or_an(const std::string& s)
479  {
480  if(s.length() > 0)
481  {
482  if(to_lower_char(s[0]) == 'u')
483  {
484  if(s.length() > 2)
485  {
486  if(to_lower_char(s[2]) == 'i')
487  {
488  return "a " + s;
489  }
490  }
491  }
492 
493  if(is_vowel(s[0]))
494  {
495  return "an " + s;
496  }
497  }
498 
499  return "a " + s;
500  }
501 
502  // useful fornverting dog -> dogs
503  std::string add_postfix_plural(const std::string& s)
504  {
505  switch(s[s.length() - 1])
506  {
507  case 's':
508  case 'h':
509  case 'x':
510  return s + "es";
511  case 'y':
512  if(!is_vowel(s[s.length() - 2]))
513  {
514  return s.substr(0, s.length() - 1) + "ies";
515  }
516  else
517  {
518  return s + "s";
519  }
520  default: return s + "s";
521  }
522  }
523 
525  std::string add_postfix_ed(const std::string& s)
526  {
527  switch(s[s.length() - 1])
528  {
529  case 'e': return s + "d";
530  case 's': case 'h': case 'x':
531  return s + "ed";
532  case 'y':
533  if(!is_vowel(s[s.length() - 2]))
534  {
535  return s.substr(0, s.length() - 1) + "ied";
536  }
537  else
538  {
539  return s + "d";
540  }
541  default: return s + "ed";
542  }
543  }
544 
545 
546  template <typename TFunc>
547  struct FunctionModifier : public Modifier
548  {
549  TFunc func;
550  FunctionModifier(TFunc f) : func(f) {}
551 
552  Result apply_modifier(const std::string& input) override
553  {
554  std::string r = func(input);
555  return Result{Result::no_error} << r;
556  }
557  };
558 
559 
560  template <typename T>
561  std::shared_ptr<Modifier> make_modifier(T func)
562  {
563  return std::shared_ptr<Modifier>{new FunctionModifier<T>(func)};
564  }
565 
566 
568  {
569  g->register_modifier("capitalizeAll", make_modifier(capitalize_all));
570  g->register_modifier("capitalize", make_modifier(capitalize));
571  g->register_modifier("a", make_modifier(add_prefix_with_a_or_an));
572  g->register_modifier("s", make_modifier(add_postfix_plural));
573  g->register_modifier("ed", make_modifier(add_postfix_ed));
574  }
575  }
576 
577  // ----------------------------------------------------------------
578  // Grammar
579 
580  Grammar::Grammar() = default;
581 
582 
583  void
585  {
587  }
588 
589 
590  Result
591  Grammar::load_from_string(const std::string& filename, const std::string& data)
592  {
593  const auto result = jsonh::Parse(data, jsonh::parse_flags::Json);
594  if(result.HasError())
595  {
596  auto ret = Result{ Result::json_parse };
597  for(const auto& e: result.errors)
598  {
599  ret << fmt::format("{}({}:{}): {}", filename, e.location.line, e.location.column, e.message);
600  }
601  return ret;
602  }
603 
604  const auto* const root = result.root->AsObject(&result.doc);
605 
606  if (root == nullptr)
607  {
608  return Result{ Result::json_parse } << "root was not a object";
609  }
610 
611  for(const auto& mem: root->object)
612  {
613  const auto& name_of_rule = mem.first;
614  Symbol rule {name_of_rule};
615  Result r = from_json(&rule, mem.second, &result.doc);
616  if(r == false)
617  {
618  return r;
619  }
620  rules.insert(std::make_pair(name_of_rule, rule));
621  }
622 
623  return {Result::no_error};
624  }
625 
626 
627  Result
628  Grammar::get_string_from_symbol(const std::string& rule, GeneratorArgument* generator) const
629  {
630  const auto has_overridden = generator->overridden_rules.find(rule);
631  if(has_overridden != generator->overridden_rules.end())
632  {
633  return Result{Result::no_error} << has_overridden->second;
634  }
635 
636  const auto& found = rules.find(rule);
637  if(found == rules.end())
638  {
639  // todo(Gustav): handle errors better
640  return Result{Result::missing_rule} << rule;
641  }
642  return found->second.flatten(generator);
643  }
644 
645 
646  Grammar&
647  Grammar::register_modifier(const std::string& name, std::shared_ptr<Modifier> mod)
648  {
649  modifiers.insert(std::make_pair(name, mod));
650  return *this;
651  }
652 
653 
654  Result
655  Grammar::apply_modifier(const std::string& name, const std::string& data) const
656  {
657  auto r = modifiers.find(name);
658  if(r == modifiers.end())
659  {
660  return Result{Result::invalid_modifier} << name;
661  }
662  return r->second->apply_modifier(data);
663  }
664 
665 
666  Result
667  Grammar::flatten(Random* random, const std::string& rule_code) const
668  {
669  GeneratorArgument generator;
670  generator.grammar = this;
671  generator.generator = random;
672  Rule syntax;
673  syntax.compile(rule_code);
674  return syntax.flatten(&generator);
675  }
676 }
#define ASSERTX(x,...)
Definition: assert.h:48
#define ASSERT(x)
Definition: assert.h:29
char to_upper_char(char b)
Definition: stringutils.cc:129
char to_lower_char(char b)
Definition: stringutils.cc:116
std::string add_postfix_ed(const std::string &s)
useful for converting walk -> walked
Definition: tracery.cc:525
bool is_alpha_num(char c)
Definition: tracery.cc:434
void register_on_grammar(Grammar *g)
Definition: tracery.cc:567
std::string add_prefix_with_a_or_an(const std::string &s)
Definition: tracery.cc:478
std::string capitalize_all(const std::string &s)
Definition: tracery.cc:440
std::string add_postfix_plural(const std::string &s)
Definition: tracery.cc:503
std::string capitalize(const std::string &s)
Definition: tracery.cc:468
std::shared_ptr< Modifier > make_modifier(T func)
Definition: tracery.cc:561
Result parse_error(TextfileParser *parser)
Definition: tracery.cc:223
std::string to_string(const Result &r)
Definition: tracery.cc:191
std::string read_tracery_ident(TextfileParser *parser)
Definition: tracery.cc:230
Result from_json(Symbol *rule, const jsonh::Value &value, const jsonh::Document *doc)
Definition: tracery.cc:32
float mod(float numer, float denumer)
Definition: numeric.cc:109
const T & get_random_item_in_vector(Random *r, const std::vector< T > &v)
Definition: random.h:68
std::string buffer
Definition: nlp_sentence.cc:87
String utility functions.
WEL512 Random Number Generator.
Definition: random.h:21
std::string to_string()
Complete the builder and return the resulting string.
StringBuilder & add_string(const std::string &str)
StringBuilder & add_char(char c)
Parses a text file in memory.
std::string peek_string(int advance=0)
like PeekChar but returns human readable strings for some chars
static TextfileParser from_string(const std::string &str)
char peek_char(int advance=0)
advance = 0 - next char, 1-the one after that, negative values are not allowed
void add_action_rule(const std::string &action_key, const std::string &action_symbol)
Definition: tracery.cc:114
Result flatten(GeneratorArgument *generator) const override
Definition: tracery.cc:122
std::vector< std::string > modifiers
Definition: tracery.cc:108
std::vector< ActionRule > action_rules
Definition: tracery.cc:109
std::map< std::string, std::string > overridden_rules
Definition: tracery.cc:26
const tracery::Grammar * grammar
Definition: tracery.cc:25
std::map< std::string, Symbol > rules
Definition: tracery.h:105
Grammar & register_modifier(const std::string &name, std::shared_ptr< Modifier > m)
Definition: tracery.cc:647
Result flatten(Random *random, const std::string &rule) const
Definition: tracery.cc:667
std::map< std::string, std::shared_ptr< Modifier > > modifiers
Definition: tracery.h:106
Result get_string_from_symbol(const std::string &rule, GeneratorArgument *generator) const
Definition: tracery.cc:628
Result load_from_string(const std::string &filename, const std::string &data)
Definition: tracery.cc:591
Result apply_modifier(const std::string &name, const std::string &data) const
Definition: tracery.cc:655
Result flatten(GeneratorArgument *) const override
Definition: tracery.cc:90
LiteralStringNode(const std::string &t)
Definition: tracery.cc:85
Result & operator<<(const std::string &t)
Definition: tracery.cc:166
std::vector< std::string > text
Definition: tracery.h:32
std::string get_text() const
Definition: tracery.cc:180
std::vector< std::shared_ptr< Node > > syntax
Definition: tracery.h:80
void add(std::shared_ptr< Node > p)
Definition: tracery.cc:381
Result compile(const std::string &s)
Definition: tracery.cc:248
Result flatten(GeneratorArgument *gen) const
Definition: tracery.cc:367
std::vector< Rule > ruleset
Definition: tracery.h:94
Result add_rule(const std::string &rule)
Definition: tracery.cc:393
Symbol(const std::string &k)
Definition: tracery.cc:389
Result flatten(GeneratorArgument *gen) const
Definition: tracery.cc:406
Result apply_modifier(const std::string &input) override
Definition: tracery.cc:552
#define EMPTY_STRING(str, err)
#define EXPECT_CHAR(chr, err)