libpqxx
The C++ client library for PostgreSQL
array-composite.hxx
1 #if !defined(PQXX_ARRAY_COMPOSITE_HXX)
2 # define PQXX_ARRAY_COMPOSITE_HXX
3 
4 # include <cassert>
5 
6 # include "pqxx/internal/encodings.hxx"
7 # include "pqxx/strconv.hxx"
8 
9 namespace pqxx::internal
10 {
11 // Find the end of a double-quoted string.
19 template<encoding_group ENC>
20 inline std::size_t scan_double_quoted_string(
21  char const input[], std::size_t size, std::size_t pos)
22 {
23  // TODO: find_char<'"', '\\'>().
24  using scanner = glyph_scanner<ENC>;
25  auto next{scanner::call(input, size, pos)};
26  PQXX_ASSUME(next > pos);
27  bool at_quote{false};
28  pos = next;
29  next = scanner::call(input, size, pos);
30  PQXX_ASSUME(next > pos);
31  while (pos < size)
32  {
33  if (at_quote)
34  {
35  if (next - pos == 1 and input[pos] == '"')
36  {
37  // We just read a pair of double quotes. Carry on.
38  at_quote = false;
39  }
40  else
41  {
42  // We just read one double quote, and now we're at a character that's
43  // not a second double quote. Ergo, that last character was the
44  // closing double quote and this is the position right after it.
45  return pos;
46  }
47  }
48  else if (next - pos == 1)
49  {
50  switch (input[pos])
51  {
52  case '\\':
53  // Backslash escape. Skip ahead by one more character.
54  pos = next;
55  next = scanner::call(input, size, pos);
56  PQXX_ASSUME(next > pos);
57  break;
58 
59  case '"':
60  // This is either the closing double quote, or the first of a pair of
61  // double quotes.
62  at_quote = true;
63  break;
64  }
65  }
66  else
67  {
68  // Multibyte character. Carry on.
69  }
70  pos = next;
71  next = scanner::call(input, size, pos);
72  PQXX_ASSUME(next > pos);
73  }
74  if (not at_quote)
75  throw argument_error{
76  "Missing closing double-quote: " + std::string{input}};
77  return pos;
78 }
79 
80 
81 // TODO: Needs version with caller-supplied buffer.
83 template<encoding_group ENC>
84 inline std::string parse_double_quoted_string(
85  char const input[], std::size_t end, std::size_t pos)
86 {
87  std::string output;
88  // Maximum output size is same as the input size, minus the opening and
89  // closing quotes. Or in the extreme opposite case, the real number could be
90  // half that. Usually it'll be a pretty close estimate.
91  output.reserve(std::size_t(end - pos - 2));
92 
93  // TODO: Use find_char<...>().
94  using scanner = glyph_scanner<ENC>;
95  auto here{scanner::call(input, end, pos)},
96  next{scanner::call(input, end, here)};
97  PQXX_ASSUME(here > pos);
98  PQXX_ASSUME(next > here);
99  while (here < end - 1)
100  {
101  // A backslash here is always an escape. So is a double-quote, since we're
102  // inside the double-quoted string. In either case, we can just ignore the
103  // escape character and use the next character. This is the one redeeming
104  // feature of SQL's escaping system.
105  if ((next - here == 1) and (input[here] == '\\' or input[here] == '"'))
106  {
107  // Skip escape.
108  here = next;
109  next = scanner::call(input, end, here);
110  PQXX_ASSUME(next > here);
111  }
112  output.append(input + here, input + next);
113  here = next;
114  next = scanner::call(input, end, here);
115  PQXX_ASSUME(next > here);
116  }
117  return output;
118 }
119 
120 
122 
129 template<pqxx::internal::encoding_group ENC, char... STOP>
130 inline std::size_t
131 scan_unquoted_string(char const input[], std::size_t size, std::size_t pos)
132 {
133  using scanner = glyph_scanner<ENC>;
134  auto next{scanner::call(input, size, pos)};
135  PQXX_ASSUME(next > pos);
136  while ((pos < size) and ((next - pos) > 1 or ((input[pos] != STOP) and ...)))
137  {
138  pos = next;
139  next = scanner::call(input, size, pos);
140  PQXX_ASSUME(next > pos);
141  }
142  return pos;
143 }
144 
145 
147 template<pqxx::internal::encoding_group ENC>
148 inline std::string_view
149 parse_unquoted_string(char const input[], std::size_t end, std::size_t pos)
150 {
151  return {&input[pos], end - pos};
152 }
153 
154 
156 
179 template<encoding_group ENC, typename T>
181  std::size_t &index, std::string_view input, std::size_t &pos, T &field,
182  std::size_t last_field)
183 {
184  assert(index <= last_field);
185  auto next{glyph_scanner<ENC>::call(std::data(input), std::size(input), pos)};
186  PQXX_ASSUME(next > pos);
187  if ((next - pos) != 1)
188  throw conversion_error{"Non-ASCII character in composite-type syntax."};
189 
190  // Expect a field.
191  switch (input[pos])
192  {
193  case ',':
194  case ')':
195  case ']':
196  // The field is empty, i.e, null.
197  if constexpr (nullness<T>::has_null)
198  field = nullness<T>::null();
199  else
200  throw conversion_error{
201  "Can't read composite field " + to_string(index) + ": C++ type " +
202  type_name<T> + " does not support nulls."};
203  break;
204 
205  case '"': {
206  auto const stop{
207  scan_double_quoted_string<ENC>(std::data(input), std::size(input), pos)};
208  PQXX_ASSUME(stop > pos);
209  auto const text{
210  parse_double_quoted_string<ENC>(std::data(input), stop, pos)};
211  field = from_string<T>(text);
212  pos = stop;
213  }
214  break;
215 
216  default: {
217  auto const stop{scan_unquoted_string<ENC, ',', ')', ']'>(
218  std::data(input), std::size(input), pos)};
219  PQXX_ASSUME(stop >= pos);
220  field =
221  from_string<T>(std::string_view{std::data(input) + pos, stop - pos});
222  pos = stop;
223  }
224  break;
225  }
226 
227  // Expect a comma or a closing parenthesis.
228  next = glyph_scanner<ENC>::call(std::data(input), std::size(input), pos);
229  PQXX_ASSUME(next > pos);
230 
231  if ((next - pos) != 1)
232  throw conversion_error{
233  "Unexpected non-ASCII character after composite field: " +
234  std::string{input}};
235 
236  if (index < last_field)
237  {
238  if (input[pos] != ',')
239  throw conversion_error{
240  "Found '" + std::string{input[pos]} +
241  "' in composite value where comma was expected: " + std::data(input)};
242  }
243  else
244  {
245  if (input[pos] == ',')
246  throw conversion_error{
247  "Composite value contained more fields than the expected " +
248  to_string(last_field) + ": " + std::data(input)};
249  if (input[pos] != ')' and input[pos] != ']')
250  throw conversion_error{
251  "Composite value has unexpected characters where closing parenthesis "
252  "was expected: " +
253  std::string{input}};
254  if (next != std::size(input))
255  throw conversion_error{
256  "Composite value has unexpected text after closing parenthesis: " +
257  std::string{input}};
258  }
259 
260  pos = next;
261  ++index;
262 }
263 
264 
266 template<typename T>
267 using composite_field_parser = void (*)(
268  std::size_t &index, std::string_view input, std::size_t &pos, T &field,
269  std::size_t last_field);
270 
271 
273 template<typename T>
275 {
276  switch (enc)
277  {
278  case encoding_group::MONOBYTE:
279  return parse_composite_field<encoding_group::MONOBYTE>;
280  case encoding_group::BIG5:
281  return parse_composite_field<encoding_group::BIG5>;
282  case encoding_group::EUC_CN:
283  return parse_composite_field<encoding_group::EUC_CN>;
284  case encoding_group::EUC_JP:
285  return parse_composite_field<encoding_group::EUC_JP>;
286  case encoding_group::EUC_KR:
287  return parse_composite_field<encoding_group::EUC_KR>;
288  case encoding_group::EUC_TW:
289  return parse_composite_field<encoding_group::EUC_TW>;
290  case encoding_group::GB18030:
291  return parse_composite_field<encoding_group::GB18030>;
292  case encoding_group::GBK: return parse_composite_field<encoding_group::GBK>;
293  case encoding_group::JOHAB:
294  return parse_composite_field<encoding_group::JOHAB>;
295  case encoding_group::MULE_INTERNAL:
296  return parse_composite_field<encoding_group::MULE_INTERNAL>;
297  case encoding_group::SJIS:
298  return parse_composite_field<encoding_group::SJIS>;
299  case encoding_group::UHC: return parse_composite_field<encoding_group::UHC>;
300  case encoding_group::UTF8:
301  return parse_composite_field<encoding_group::UTF8>;
302  }
303  throw internal_error{concat("Unexpected encoding group code: ", enc, ".")};
304 }
305 
306 
308 template<typename T>
309 inline std::size_t size_composite_field_buffer(T const &field)
310 {
311  if constexpr (is_unquoted_safe<T>)
312  {
313  // Safe to copy, without quotes or escaping. Drop the terminating zero.
314  return size_buffer(field) - 1;
315  }
316  else
317  {
318  // + Opening quote.
319  // + Field budget.
320  // - Terminating zero.
321  // + Escaping for each byte in the field's string representation.
322  // - Escaping for terminating zero.
323  // + Closing quote.
324  return 1 + 2 * (size_buffer(field) - 1) + 1;
325  }
326 }
327 
328 
329 template<typename T>
330 inline void write_composite_field(char *&pos, char *end, T const &field)
331 {
332  if constexpr (is_unquoted_safe<T>)
333  {
334  // No need for quoting or escaping. Convert it straight into its final
335  // place in the buffer, and "backspace" the trailing zero.
336  pos = string_traits<T>::into_buf(pos, end, field) - 1;
337  }
338  else
339  {
340  // The field may need escaping, which means we need an intermediate buffer.
341  // To avoid allocating that at run time, we use the end of the buffer that
342  // we have.
343  auto const budget{size_buffer(field)};
344  *pos++ = '"';
345 
346  // Now escape buf into its final position.
347  for (char const c : string_traits<T>::to_buf(end - budget, end, field))
348  {
349  if ((c == '"') or (c == '\\'))
350  *pos++ = '\\';
351 
352  *pos++ = c;
353  }
354 
355  *pos++ = '"';
356  }
357 
358  *pos++ = ',';
359 }
360 } // namespace pqxx::internal
361 #endif
static PQXX_PURE std::size_t call(char const buffer[], std::size_t buffer_len, std::size_t start)
Find the next glyph in buffer after position start.
std::string concat(TYPE...item)
Efficiently combine a bunch of items into one big string.
Definition: concat.hxx:31
std::size_t scan_double_quoted_string(char const input[], std::size_t size, std::size_t pos)
Definition: array-composite.hxx:20
std::size_t scan_unquoted_string(char const input[], std::size_t size, std::size_t pos)
Find the end of an unquoted string in an array or composite-type value.
Definition: array-composite.hxx:131
Internal items for libpqxx' own use. Do not use these yourself.
Definition: encodings.cxx:32
std::size_t size_buffer(TYPE const &...value) noexcept
Estimate how much buffer space is needed to represent values as a string.
Definition: strconv.hxx:526
composite_field_parser< T > specialize_parse_composite_field(encoding_group enc)
Look up implementation of parse_composite_field for ENC.
Definition: array-composite.hxx:274
std::vector< std::string_view > to_buf(char *here, char const *end, TYPE...value)
Convert multiple values to strings inside a single buffer.
Definition: strconv.hxx:493
std::string parse_double_quoted_string(char const input[], std::size_t end, std::size_t pos)
Un-quote and un-escape a double-quoted SQL string.
Definition: array-composite.hxx:84
PQXX_LIBEXPORT std::string to_string(field const &value)
Convert a field to a string.
Wrapper struct template for "find next glyph" functions.
Definition: encodings.hxx:142
std::string_view parse_unquoted_string(char const input[], std::size_t end, std::size_t pos)
Parse an unquoted array entry or cfield of a composite-type field.
Definition: array-composite.hxx:149
void(*)(std::size_t &index, std::string_view input, std::size_t &pos, T &field, std::size_t last_field) composite_field_parser
Pointer to an encoding-specific specialisation of parse_composite_field.
Definition: array-composite.hxx:269
void parse_composite_field(std::size_t &index, std::string_view input, std::size_t &pos, T &field, std::size_t last_field)
Parse a field of a composite-type value.
Definition: array-composite.hxx:180
Value conversion failed, e.g. when converting "Hello" to int.
Definition: except.hxx:282
static TYPE null()
Return a null value.
Reference to a field in a result set.
Definition: field.hxx:34
Invalid argument passed to libpqxx, similar to std::invalid_argument.
Definition: except.hxx:265
Traits describing a type's "null value," if any.
Definition: strconv.hxx:90
Internal error in libpqxx library.
Definition: except.hxx:241
static char * into_buf(char *begin, char *end, TYPE const &value)
Write value's string representation into buffer at begin.
std::size_t size_composite_field_buffer(T const &field)
Conservatively estimate buffer size needed for a composite field.
Definition: array-composite.hxx:309