Euphoria
editdistance.cc
Go to the documentation of this file.
1 #include "base/editdistance.h"
2 
3 #include <algorithm>
4 #include <memory>
5 
6 #include "assert/assert.h"
7 
8 #include "base/cint.h"
9 
10 
11 namespace eu
12 {
13  int
14  calc_edit_distance(const std::string& source, const std::string& target)
15  {
16  const int source_length = c_sizet_to_int(source.length());
17  const int target_length = c_sizet_to_int(target.length());
18  if(source_length == 0)
19  {
20  return target_length;
21  }
22  if(target_length == 0)
23  {
24  return source_length;
25  }
26 
27  using Tmatrix = std::vector<std::vector<int>>;
28 
29  Tmatrix matrix(source_length + 1);
30 
31  for(int source_index = 0; source_index <= source_length; source_index++)
32  {
33  matrix[source_index].resize(target_length + 1);
34  }
35 
36  for(int source_index = 0; source_index <= source_length; source_index++)
37  {
38  matrix[source_index][0] = source_index;
39  }
40 
41  for(int target_index = 0; target_index <= target_length; target_index++)
42  {
43  matrix[0][target_index] = target_index;
44  }
45 
46  for(int source_index = 1; source_index <= source_length; source_index++)
47  {
48  const char source_char = source[source_index - 1];
49 
50  for(int target_index = 1; target_index <= target_length; target_index++)
51  {
52  const char target_char = target[target_index - 1];
53 
54  const int cost = (source_char == target_char) ? 0 : 1;
55 
56  const int above = matrix[source_index - 1][target_index];
57  const int left = matrix[source_index][target_index - 1];
58  const int diag = matrix[source_index - 1][target_index - 1];
59  int cell = std::min(above + 1, std::min(left + 1, diag + cost));
60 
61  if(source_index > 2 && target_index > 2)
62  {
63  int trans = matrix[source_index - 2][target_index - 2] + 1;
64  if(source[source_index - 2] != target_char)
65  {
66  trans++;
67  }
68  if(source_char != target[target_index - 2])
69  {
70  trans++;
71  }
72  if(cell > trans)
73  {
74  cell = trans;
75  }
76  }
77 
78  matrix[source_index][target_index] = cell;
79  }
80  }
81 
82  return matrix[source_length][target_length];
83  }
84 
85  int
86  calc_edit_distance_fast(const std::string& the_row, const std::string& the_col)
87  {
88  const int row_length = c_sizet_to_int(the_row.length());
89  const int col_length = c_sizet_to_int(the_col.length());
90 
91  if(row_length == 0)
92  {
93  return col_length;
94  }
95 
96  if(col_length == 0)
97  {
98  return row_length;
99  }
100 
101  std::unique_ptr<int[]> v0 {new int[row_length + 1]};
102  std::unique_ptr<int[]> v1 {new int[row_length + 1]};
103 
104  for(int row_index = 0; row_index <= row_length; row_index++)
105  {
106  v0[row_index] = row_index;
107  v1[row_index] = 0;
108  }
109 
110  for(int col_index = 1; col_index <= col_length; col_index++)
111  {
112  v1[0] = col_index;
113 
114  for(int row_index = 1; row_index <= row_length; row_index++)
115  {
116  const int cost
117  = the_row[row_index - 1] == the_col[col_index - 1] ? 0 : 1;
118 
119  const int m_min = v0[row_index] + 1;
120  const int b = v1[row_index - 1] + 1;
121  const int c = v0[row_index - 1] + cost;
122 
123  v1[row_index] = std::min(std::min(m_min, b), c);
124  }
125 
126  v0.swap(v1);
127  }
128 
129  return v0[row_length];
130  }
131 
132 }
Definition: assert.h:90
int c_sizet_to_int(size_t t)
Definition: cint.cc:11
size2f min(const size2f lhs, const size2f rhs)
Definition: size2.cc:140
int calc_edit_distance_fast(const std::string &the_row, const std::string &the_col)
Definition: editdistance.cc:86
int calc_edit_distance(const std::string &source, const std::string &target)
Definition: editdistance.cc:14