libpqxx
The C++ client library for PostgreSQL
array.hxx
1 /* Handling of SQL arrays.
2  *
3  * DO NOT INCLUDE THIS FILE DIRECTLY; include pqxx/field instead.
4  *
5  * Copyright (c) 2000-2025, Jeroen T. Vermeulen.
6  *
7  * See COPYING for copyright license. If you did not receive a file called
8  * COPYING with this source code, please notify the distributor of this
9  * mistake, or contact the author.
10  */
11 #ifndef PQXX_H_ARRAY
12 #define PQXX_H_ARRAY
13 
14 #if !defined(PQXX_HEADER_PRE)
15 # error "Include libpqxx headers as <pqxx/header>, not <pqxx/header.hxx>."
16 #endif
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <stdexcept>
21 #include <string>
22 #include <type_traits>
23 #include <utility>
24 #include <vector>
25 
26 #include "pqxx/connection.hxx"
27 #include "pqxx/internal/array-composite.hxx"
28 #include "pqxx/internal/encoding_group.hxx"
29 #include "pqxx/internal/encodings.hxx"
30 
31 
32 namespace pqxx
33 {
34 // TODO: Specialise for string_view/zview, allocate all strings in one buffer.
35 
37 
52 template<
53  typename ELEMENT, std::size_t DIMENSIONS = 1u,
54  char SEPARATOR = array_separator<ELEMENT>>
55 class array final
56 {
57 public:
59 
68  array(std::string_view data, connection const &cx) :
70  {}
71 
73 
75  constexpr std::size_t dimensions() noexcept { return DIMENSIONS; }
76 
78 
82  std::array<std::size_t, DIMENSIONS> const &sizes() noexcept
83  {
84  return m_extents;
85  }
86 
87  template<typename... INDEX> ELEMENT const &at(INDEX... index) const
88  {
89  static_assert(sizeof...(index) == DIMENSIONS);
90  check_bounds(index...);
91  return m_elts.at(locate(index...));
92  }
93 
95 
103  template<typename... INDEX> ELEMENT const &operator[](INDEX... index) const
104  {
105  static_assert(sizeof...(index) == DIMENSIONS);
106  return m_elts[locate(index...)];
107  }
108 
110 
115  constexpr auto cbegin() const noexcept { return m_elts.cbegin(); }
117  constexpr auto cend() const noexcept { return m_elts.cend(); }
119  constexpr auto crbegin() const noexcept { return m_elts.crbegin(); }
121  constexpr auto crend() const noexcept { return m_elts.crend(); }
122 
124 
127  constexpr std::size_t size() const noexcept { return m_elts.size(); }
128 
130 
145  constexpr auto ssize() const noexcept
146  {
147  return static_cast<std::ptrdiff_t>(size());
148  }
149 
151 
153  constexpr auto front() const noexcept { return m_elts.front(); }
154 
156 
158  constexpr auto back() const noexcept { return m_elts.back(); }
159 
160 private:
162 
170  void check_dims(std::string_view data)
171  {
172  auto sz{std::size(data)};
173  if (sz < DIMENSIONS * 2)
174  throw conversion_error{pqxx::internal::concat(
175  "Trying to parse a ", DIMENSIONS, "-dimensional array out of '", data,
176  "'.")};
177 
178  // Making some assumptions here:
179  // * The array holds no extraneous whitespace.
180  // * None of the sub-arrays can be null.
181  // * Only ASCII characters start off with a byte in the 0-127 range.
182  //
183  // Given those, the input must start with a sequence of DIMENSIONS bytes
184  // with the ASCII value for '{'; and likewise it must end with a sequence
185  // of DIMENSIONS bytes with the ASCII value for '}'.
186 
187  if (data[0] != '{')
188  throw conversion_error{"Malformed array: does not start with '{'."};
189  for (std::size_t i{0}; i < DIMENSIONS; ++i)
190  if (data[i] != '{')
191  throw conversion_error{pqxx::internal::concat(
192  "Expecting ", DIMENSIONS, "-dimensional array, but found ", i, ".")};
193  if (data[DIMENSIONS] == '{')
194  throw conversion_error{pqxx::internal::concat(
195  "Tried to parse ", DIMENSIONS,
196  "-dimensional array from array data that has more dimensions.")};
197  for (std::size_t i{0}; i < DIMENSIONS; ++i)
198  if (data[sz - 1 - i] != '}')
199  throw conversion_error{
200  "Malformed array: does not end in the right number of '}'."};
201  }
202 
203  // Allow fields to construct arrays passing the encoding group.
204  // Couldn't make this work through a call gate, thanks to the templating.
205  friend class ::pqxx::field;
206 
207  array(std::string_view data, pqxx::internal::encoding_group enc)
208  {
209  using group = pqxx::internal::encoding_group;
210  switch (enc)
211  {
212  case group::MONOBYTE: parse<group::MONOBYTE>(data); break;
213  case group::BIG5: parse<group::BIG5>(data); break;
214  case group::EUC_CN: parse<group::EUC_CN>(data); break;
215  case group::EUC_JP: parse<group::EUC_JP>(data); break;
216  case group::EUC_KR: parse<group::EUC_KR>(data); break;
217  case group::EUC_TW: parse<group::EUC_TW>(data); break;
218  case group::GB18030: parse<group::GB18030>(data); break;
219  case group::GBK: parse<group::GBK>(data); break;
220  case group::JOHAB: parse<group::JOHAB>(data); break;
221  case group::MULE_INTERNAL: parse<group::MULE_INTERNAL>(data); break;
222  case group::SJIS: parse<group::SJIS>(data); break;
223  case group::UHC: parse<group::UHC>(data); break;
224  case group::UTF8: parse<group::UTF8>(data); break;
225  default: PQXX_UNREACHABLE; break;
226  }
227  }
228 
230 
233  std::size_t parse_field_end(std::string_view data, std::size_t here) const
234  {
235  auto const sz{std::size(data)};
236  if (here < sz)
237  switch (data[here])
238  {
239  case SEPARATOR:
240  ++here;
241  if (here >= sz)
242  throw conversion_error{"Array looks truncated."};
243  switch (data[here])
244  {
245  case SEPARATOR:
246  throw conversion_error{"Array contains double separator."};
247  case '}': throw conversion_error{"Array contains trailing separator."};
248  default: break;
249  }
250  break;
251  case '}': break;
252  default:
253  throw conversion_error{pqxx::internal::concat(
254  "Unexpected character in array: ",
255  static_cast<unsigned>(static_cast<unsigned char>(data[here])),
256  " where separator or closing brace expected.")};
257  }
258  return here;
259  }
260 
262 
267  constexpr std::size_t estimate_elements(std::string_view data) const noexcept
268  {
269  // Dirty trick: just count the number of bytes that look as if they may be
270  // separators. At the very worst we may overestimate by a factor of two or
271  // so, in exceedingly rare cases, on some encodings.
272  auto const separators{
273  std::count(std::begin(data), std::end(data), SEPARATOR)};
274  // The number of dimensions makes no difference here. It's still one
275  // separator between consecutive elements, just possibly with some extra
276  // braces as well.
277  return static_cast<std::size_t>(separators + 1);
278  }
279 
280  template<pqxx::internal::encoding_group ENC>
281  void parse(std::string_view data)
282  {
283  static_assert(DIMENSIONS > 0u, "Can't create a zero-dimensional array.");
284  auto const sz{std::size(data)};
285  check_dims(data);
286 
287  m_elts.reserve(estimate_elements(data));
288 
289  // We discover the array's extents along each of the dimensions, starting
290  // with the final dimension and working our way towards the first. At any
291  // given point during parsing, we know the extents starting at this
292  // dimension.
293  std::size_t know_extents_from{DIMENSIONS};
294 
295  // Currently parsing this dimension. We start off at -1, relying on C++'s
296  // well-defined rollover for unsigned numbers.
297  // The actual outermost dimension of the array is 0, and the innermost is
298  // at the end. But, the array as a whole is enclosed in braces just like
299  // each row. So we act like there's an anomalous "outer" dimension holding
300  // the entire array.
301  constexpr std::size_t outer{std::size_t{0u} - std::size_t{1u}};
302 
303  // We start parsing at the fictional outer dimension. The input begins
304  // with opening braces, one for each dimension, so we'll start off by
305  // bumping all the way to the innermost dimension.
306  std::size_t dim{outer};
307 
308  // Extent counters, one per "real" dimension.
309  // Note initialiser syntax; this zero-initialises all elements.
310  std::array<std::size_t, DIMENSIONS> extents{};
311 
312  // Current parsing position.
313  std::size_t here{0};
314  PQXX_ASSUME(here <= sz);
315  while (here < sz)
316  {
317  if (data[here] == '{')
318  {
319  if (dim == outer)
320  {
321  // This must be the initial opening brace.
322  if (know_extents_from != DIMENSIONS)
323  throw conversion_error{
324  "Array text representation closed and reopened its outside "
325  "brace pair."};
326  assert(here == 0);
327  PQXX_ASSUME(here == 0);
328  }
329  else
330  {
331  if (dim >= (DIMENSIONS - 1))
332  throw conversion_error{
333  "Array seems to have inconsistent number of dimensions."};
334  ++extents[dim];
335  }
336  // (Rolls over to zero if we're coming from the outer dimension.)
337  ++dim;
338  extents[dim] = 0u;
339  ++here;
340  }
341  else if (data[here] == '}')
342  {
343  if (dim == outer)
344  throw conversion_error{"Array has spurious '}'."};
345  if (dim < know_extents_from)
346  {
347  // We just finished parsing our first row in this dimension.
348  // Now we know the array dimension's extent.
349  m_extents[dim] = extents[dim];
350  know_extents_from = dim;
351  }
352  else
353  {
354  if (extents[dim] != m_extents[dim])
355  throw conversion_error{"Rows in array have inconsistent sizes."};
356  }
357  // Bump back down to the next-lower dimension. Which may be the outer
358  // dimension, through underflow.
359  --dim;
360  ++here;
361  here = parse_field_end(data, here);
362  }
363  else
364  {
365  // Found an array element. The actual elements always live in the
366  // "inner" dimension.
367  if (dim != DIMENSIONS - 1)
368  throw conversion_error{
369  "Malformed array: found element where sub-array was expected."};
370  assert(dim != outer);
371  ++extents[dim];
372  std::size_t end;
373  switch (data[here])
374  {
375  case '\0': throw conversion_error{"Unexpected zero byte in array."};
376  case ',': throw conversion_error{"Array contains empty field."};
377  case '"': {
378  // Double-quoted string. We parse it into a buffer before parsing
379  // the resulting string as an element. This seems wasteful: the
380  // string might not contain any special characters. So it's
381  // tempting to check, and try to use a string_view and avoid a
382  // useless copy step. But. Even besides the branch prediction
383  // risk, the very fact that the back-end chose to quote the string
384  // indicates that there is some kind of special character in there.
385  // So in practice, this optimisation would only apply if the only
386  // special characters in the string were commas.
387  end = pqxx::internal::scan_double_quoted_string<ENC>(
388  std::data(data), std::size(data), here);
389  // TODO: scan_double_quoted_string() with reusable buffer.
390  std::string const buf{
391  pqxx::internal::parse_double_quoted_string<ENC>(
392  std::data(data), end, here)};
393  m_elts.emplace_back(from_string<ELEMENT>(buf));
394  }
395  break;
396  default: {
397  // Unquoted string. An unquoted string is always literal, no
398  // escaping or encoding, so we don't need to parse it into a
399  // buffer. We can just read it as a string_view.
400  end = pqxx::internal::scan_unquoted_string<ENC, SEPARATOR, '}'>(
401  std::data(data), std::size(data), here);
402  std::string_view const field{
403  std::string_view{std::data(data) + here, end - here}};
404  if (field == "NULL")
405  {
406  if constexpr (nullness<ELEMENT>::has_null)
407  m_elts.emplace_back(nullness<ELEMENT>::null());
408  else
409  throw unexpected_null{pqxx::internal::concat(
410  "Array contains a null ", type_name<ELEMENT>,
411  ". Consider making it an array of std::optional<",
412  type_name<ELEMENT>, "> instead.")};
413  }
414  else
415  m_elts.emplace_back(from_string<ELEMENT>(field));
416  }
417  }
418  here = end;
419  PQXX_ASSUME(here <= sz);
420  here = parse_field_end(data, here);
421  }
422  }
423 
424  if (dim != outer)
425  throw conversion_error{"Malformed array; may be truncated."};
426  assert(know_extents_from == 0);
427  PQXX_ASSUME(know_extents_from == 0);
428 
429  init_factors();
430  }
431 
433  void init_factors() noexcept
434  {
435  std::size_t factor{1};
436  for (std::size_t dim{DIMENSIONS - 1}; dim > 0; --dim)
437  {
438  factor *= m_extents[dim];
439  m_factors[dim - 1] = factor;
440  }
441  }
442 
444  template<typename... INDEX> std::size_t locate(INDEX... index) const noexcept
445  {
446  static_assert(
447  sizeof...(index) == DIMENSIONS,
448  "Indexing array with wrong number of dimensions.");
449  return add_index(index...);
450  }
451 
452  template<typename OUTER, typename... INDEX>
453  constexpr std::size_t add_index(OUTER outer, INDEX... indexes) const noexcept
454  {
455  std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
456  if constexpr (sizeof...(indexes) == 0)
457  {
458  return first;
459  }
460  else
461  {
462  static_assert(sizeof...(indexes) < DIMENSIONS);
463  // (Offset by 1 here because the outer dimension is not in there.)
464  constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
465  static_assert(dimension < DIMENSIONS);
466  return first * m_factors[dimension] + add_index(indexes...);
467  }
468  }
469 
471 
473  template<typename OUTER, typename... INDEX>
474  constexpr void check_bounds(OUTER outer, INDEX... indexes) const
475  {
476  std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
477  static_assert(sizeof...(indexes) < DIMENSIONS);
478  // (Offset by 1 here because the outer dimension is not in there.)
479  constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
480  static_assert(dimension < DIMENSIONS);
481  if (first >= m_extents[dimension])
482  throw range_error{pqxx::internal::concat(
483  "Array index for dimension ", dimension, " is out of bounds: ", first,
484  " >= ", m_extents[dimension])};
485 
486  // Now check the rest of the indexes, if any.
487  if constexpr (sizeof...(indexes) > 0)
488  check_bounds(indexes...);
489  }
490 
492  std::vector<ELEMENT> m_elts;
493 
495  std::array<std::size_t, DIMENSIONS> m_extents;
496 
498 
505  std::array<std::size_t, DIMENSIONS - 1> m_factors;
506 };
507 
508 
510 
530 class PQXX_LIBEXPORT array_parser
531 {
532 public:
534  enum class juncture
535  {
537  row_start,
539  row_end,
541  null_value,
543  string_value,
545  done,
546  };
547 
549 
553  explicit array_parser(
554  std::string_view input,
555  internal::encoding_group = internal::encoding_group::MONOBYTE);
556 
558 
564  std::pair<juncture, std::string> get_next() { return (this->*m_impl)(); }
565 
566 private:
567  std::string_view m_input;
568 
570  std::size_t m_pos = 0u;
571 
573 
578  using implementation = std::pair<juncture, std::string> (array_parser::*)();
579 
581  static implementation
582  specialize_for_encoding(pqxx::internal::encoding_group enc);
583 
585  implementation m_impl;
586 
588  template<pqxx::internal::encoding_group>
589  std::pair<juncture, std::string> parse_array_step();
590 
591  template<pqxx::internal::encoding_group>
592  std::string::size_type scan_double_quoted_string() const;
593  template<pqxx::internal::encoding_group>
594  std::string parse_double_quoted_string(std::string::size_type end) const;
595  template<pqxx::internal::encoding_group>
596  std::string::size_type scan_unquoted_string() const;
597  template<pqxx::internal::encoding_group>
598  std::string_view parse_unquoted_string(std::string::size_type end) const;
599 
600  template<pqxx::internal::encoding_group>
601  std::string::size_type scan_glyph(std::string::size_type pos) const;
602  template<pqxx::internal::encoding_group>
603  std::string::size_type
604  scan_glyph(std::string::size_type pos, std::string::size_type end) const;
605 };
606 } // namespace pqxx
607 #endif
std::string concat(TYPE...item)
Efficiently combine a bunch of items into one big string.
Definition: concat.hxx:31
constexpr std::size_t size() const noexcept
Number of elements in the array.
Definition: array.hxx:127
TO check_cast(FROM value, std::string_view description)
Cast a numeric value to another type, or throw if it underflows/overflows.
Definition: util.hxx:153
constexpr auto crend() const noexcept
Return end point of reverse iteration.
Definition: array.hxx:121
array(std::string_view data, connection const &cx)
Parse an SQL array, read as text from a pqxx::result or stream.
Definition: array.hxx:68
Low-level parser for C++ arrays.
Definition: array.hxx:530
constexpr auto back() const noexcept
Refer to the last element, if any.
Definition: array.hxx:158
constexpr auto front() const noexcept
Refer to the first element, if any.
Definition: array.hxx:153
constexpr auto crbegin() const noexcept
Begin reverse iteration.
Definition: array.hxx:119
constexpr auto cend() const noexcept
Return end point of iteration.
Definition: array.hxx:117
juncture
What's the latest thing found in the array?
Definition: array.hxx:534
The home of all libpqxx classes, functions, templates, etc.
Definition: array.cxx:26
constexpr auto cbegin() const noexcept
Begin iteration of individual elements.
Definition: array.hxx:115
constexpr std::size_t dimensions() noexcept
How many dimensions does this array have?
Definition: array.hxx:75
constexpr auto ssize() const noexcept
Number of elements in the array (as a signed number).
Definition: array.hxx:145
pqxx::internal::encoding_group enc_group(std::string_view encoding_name)
Convert libpq encoding name to its libpqxx encoding group.
Definition: encodings.cxx:35
Connection to a database.
Definition: connection.hxx:278
std::pair< juncture, std::string > get_next()
Parse the next step in the array.
Definition: array.hxx:564
std::array< std::size_t, DIMENSIONS > const & sizes() noexcept
Return the sizes of this array in each of its dimensions.
Definition: array.hxx:82
int encoding_id() const
Get the connection's encoding, as a PostgreSQL-defined code.
Definition: connection.cxx:1160
ELEMENT const & operator[](INDEX...index) const
Access element (without bounds check).
Definition: array.hxx:103
An SQL array received from the database.
Definition: array.hxx:55