libabigail
abg-libxml-utils.cc
Go to the documentation of this file.
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright (C) 2013-2025 Red Hat, Inc.
5 
6 /// @file
7 
8 #include <string>
9 #include <iostream>
10 #include <fstream>
11 #include "abg-tools-utils.h"
12 
13 #include "abg-internal.h"
14 // <headers defining libabigail's API go under here>
15 ABG_BEGIN_EXPORT_DECLARATIONS
16 
17 #include "abg-libxml-utils.h"
18 
20 // </headers defining libabigail's API>
21 
22 namespace abigail
23 {
24 
25 namespace sptr_utils
26 {
27 /// Build and return a shared_ptr for a pointer to xmlTextReader
28 template<>
29 shared_ptr<xmlTextReader>
30 build_sptr<xmlTextReader>(::xmlTextReader *p)
31 {
32  return shared_ptr<xmlTextReader>(p, abigail::xml::textReaderDeleter());
33 }
34 
35 /// Build and return a shared_ptr for a pointer to xmlChar
36 template<>
37 shared_ptr<xmlChar>
39 {
40  return shared_ptr<xmlChar>(p, abigail::xml::charDeleter());
41 }
42 
43 }//end namespace sptr_utils
44 
45 namespace xml
46 {
47 using std::istream;
48 using std::ifstream;
52 
53 // <xmlIO callbacks for xz reading support>
54 
55 
56 /// This is an xmlIO callback function used in the libxml2 I/O input
57 /// API to detect if the current handler can provider input
58 /// functionality for a file designed by a path.
59 ///
60 /// This function should return 1 iff the file contains XZ-compressed
61 /// data.
62 ///
63 /// @param filepath the path to file to consider.
64 ///
65 /// @return 1 iff the file designated by @p filepath is XZ-compressed.
66 static int
67 xz_io_match_cb(const char*filepath)
68 {
69  bool does_match = false;
70  file_type t = guess_file_type(filepath, /*look_through_compression=*/false);
72  does_match = true;
73 
74  return does_match;
75 }
76 
77 /// This is the context used by the xmlIO handler that provides input
78 /// functionality to the libxml2 I/O input API for XZ-compressed XML
79 /// files.
80 struct xz_ctxt_type
81 {
82  // The input XZ-compressed file stream.
83  std::unique_ptr<std::ifstream> input_fstream;
84  // The custom XZ-decompressor streambuf provided by tools-utils.
85  std::unique_ptr<xz_decompressor_type> decompressor_streambuf;
86  // The decompressed input stream that we can read from.
87  std::unique_ptr<std::istream> decompressed_input_stream;
88 
89  xz_ctxt_type() = delete;
90 
91  /// Constructor.
92  ///
93  /// @param is the XZ-compressed input file stream to consider.
94  xz_ctxt_type(std::ifstream* is)
95  : input_fstream(is),
96  decompressor_streambuf(new xz_decompressor_type(*is)),
97  decompressed_input_stream(new istream(decompressor_streambuf.get()))
98  {}
99 }; // end struct xz_ctxt_type.
100 
101 /// Callback used in the I/O input API of libxml2 to open a file
102 /// designated by a path and containing XZ-compressed content.
103 ///
104 /// @param filepath the path to the file to open. The file should
105 /// contain XZ-compressed data, as detected by @ref xz_io_match_cb.
106 ///
107 /// @return a pointer to an instance of @ref xz_ctxt_type if the
108 /// function could successfully open the file denoted by @p filepath.
109 /// Please note that this instance of @ref xz_ctxt_type has to be
110 /// deleted by @ref xz_io_close_cb.
111 static void*
112 xz_io_open_cb(const char* filepath)
113 {
114  std::ifstream* s = new std::ifstream(filepath, ifstream::binary);
115  if (s->bad())
116  {
117  delete s;
118  return nullptr;
119  }
120 
121  xz_ctxt_type *ctxt = new xz_ctxt_type(s);
122  return ctxt;
123 }
124 
125 /// Callback used in the I/O input API of libxml2 to read and
126 /// decompress data from an XZ-compressed file previously opened by
127 /// @ref xz_io_open_cb.
128 ///
129 /// @param context a pointer to the instance of @ref xz_ctxt_type
130 /// returned by @ref xz_io_open_cb. That context is used to read and
131 /// decompress the XZ-compressed data coming from input file.
132 ///
133 /// @param buffer the buffer where to copy the XZ-decompressed data.
134 ///
135 /// @param len the length of @p buffer.
136 ///
137 /// @return the actual number of bytes decompressed and copied into @p
138 /// buffer.
139 static int
140 xz_io_read_cb(void* context, char *buffer, int len)
141 {
142  xz_ctxt_type *ctxt = static_cast<xz_ctxt_type *>(context);
143  ctxt->decompressed_input_stream->read(buffer, len);
144  int nb_bytes_read = ctxt->decompressed_input_stream->gcount();
145  return nb_bytes_read;
146 }
147 
148 /// Callback used in the I/O input API of libxml2 to delete the
149 /// instance of @ref xz_ctxt_type created by @ref xz_io_open_cb and
150 /// free its associated resources.
151 ///
152 /// @param context the pointer to the instance of @ref xz_ctxt_type to
153 /// delete.
154 ///
155 /// @return 0 iff the operation was successful.
156 static int
157 xz_io_close_cb(void* context)
158 {
159  xz_ctxt_type *ctxt = static_cast<xz_ctxt_type*>(context);
160  ctxt->decompressed_input_stream.reset();
161  ctxt->input_fstream->close();
162  ctxt->input_fstream.reset();
163  delete ctxt;
164  return 0;
165 }
166 
167 // </xmlIO callbacks for xz reading support>
168 
169 /// The initialization function of libxml2 abstraction layer. This
170 /// function must be called prior to using any of the libxml2 capabilities.
171 void
173 {
174  LIBXML_TEST_VERSION;
175  xmlInitParser();
176  xmlRegisterInputCallbacks(xz_io_match_cb, xz_io_open_cb,
177  xz_io_read_cb, xz_io_close_cb);
178 }
179 
180 /// Instantiate an xmlTextReader that parses the content of an on-disk
181 /// file, wrap it into a smart pointer and return it.
182 ///
183 /// @param path the path to the file to be parsed by the returned
184 /// instance of xmlTextReader.
186 new_reader_from_file(const std::string& path)
187 {
188  reader_sptr p =
189  build_sptr(xmlNewTextReaderFilename (path.c_str()));
190 
191  return p;
192 }
193 
194 /// Instanciate an xmlTextReader that parses the content of an
195 /// in-memory buffer, wrap it into a smart pointer and return it.
196 ///
197 /// @param buffer the in-memory buffer to be parsed by the returned
198 /// instance of xmlTextReader.
200 new_reader_from_buffer(const std::string& buffer)
201 {
202  reader_sptr p =
203  build_sptr(xmlReaderForMemory(buffer.c_str(),
204  buffer.length(),
205  "", 0, 0));
206  return p;
207 }
208 
209 /// This is an xmlInputReadCallback, meant to be passed to
210 /// xmlNewTextReaderForIO. It reads a number of bytes from an istream.
211 ///
212 /// @param context an std::istream* cast into a void*. This is the
213 /// istream that the xmlTextReader is too read data from.
214 ///
215 /// @param buffer the buffer where to copy the data read from the
216 /// input stream.
217 ///
218 /// @param len the number of byte to read from the input stream and to
219 /// copy into @p buffer.
220 ///
221 /// @return the number of bytes read or -1 in case of error.
222 static int
223 xml_istream_input_read(void* context,
224  char* buffer,
225  int len)
226 {
227  istream* in = reinterpret_cast<istream*>(context);
228  in->read(buffer, len);
229  return in->gcount();
230 }
231 
232 /// This is an xmlInputCloseCallback, meant to be passed to
233 /// xmlNewTextReaderForIO. It's supposed to close the input stream
234 /// that the xmlTextReader is reading from. This particular
235 /// implementation is noop; it does nothing.
236 ///
237 /// @return 0.
238 static int
239 xml_istream_input_close(void*)
240 {return 0;}
241 
242 /// Instanciate an xmlTextReader that parses a content coming from an
243 /// input stream.
244 ///
245 /// @param in the input stream to consider.
246 ///
247 /// @return reader_sptr a pointer to the newly instantiated xml
248 /// reader.
250 new_reader_from_istream(std::istream* in)
251 {
252  reader_sptr p =
253  build_sptr(xmlReaderForIO(&xml_istream_input_read,
254  &xml_istream_input_close,
255  in, "", 0, 0));
256  return p;
257 }
258 
259 /// Convert a shared pointer to xmlChar into an std::string.
260 ///
261 /// If the xmlChar is NULL, set "" to the string.
262 ///
263 /// @param ssptr the shared point to xmlChar to convert.
264 ///
265 /// @param s the output string.
266 ///
267 /// @return true if the shared pointer to xmlChar contained a non NULL
268 /// string, false otherwise.
269 bool
270 xml_char_sptr_to_string(xml_char_sptr& ssptr, std::string& s)
271 {
272  bool non_nil = false;
273  if (CHAR_STR(ssptr))
274  {
275  s = CHAR_STR(ssptr);
276  non_nil = true;
277  }
278  else
279  {
280  s = "";
281  non_nil = false;
282  }
283 
284  return non_nil;
285 }
286 
287 /// Return the depth of an xml element node.
288 ///
289 /// Note that the node must be attached to an XML document.
290 ///
291 /// @param n the xml to consider.
292 ///
293 /// @return a positive or zero number for an XML node properly
294 /// attached to an xml document, -1 otherwise. Note that the function
295 /// returns -1 if passed an xml document as well.
296 int
297 get_xml_node_depth(xmlNodePtr n)
298 {
299  if (n->type == XML_DOCUMENT_NODE || n->parent == NULL)
300  return -1;
301 
302  if (n->parent->type == XML_DOCUMENT_NODE)
303  return 0;
304 
305  return 1 + get_xml_node_depth(n->parent);
306 }
307 
308 /// Escape the 5 characters representing the predefined XML entities.
309 ///
310 /// The resulting entities and their matching characters are:
311 ///
312 /// &lt; for the character '<', &gt; for the character '>', &apos; for
313 /// the character ''', &quot; for the character '"', and &amp; for the
314 /// character '&'.
315 ///
316 //// @param str the input string to read to search for the characters
317 //// to escape.
318 ////
319 //// @param escaped the output string where to write the resulting
320 //// string that contains the pre-defined characters escaped as
321 //// predefined entitites.
322 void
323 escape_xml_string(const std::string& str,
324  std::string& escaped)
325 {
326  for (std::string::const_iterator i = str.begin(); i != str.end(); ++i)
327  switch (*i)
328  {
329  case '<':
330  escaped += "&lt;";
331  break;
332  case '>':
333  escaped += "&gt;";
334  break;
335  case '&':
336  escaped += "&amp;";
337  break;
338  case '\'':
339  escaped += "&apos;";
340  break;
341  case '"':
342  escaped += "&quot;";
343  break;
344  default:
345  escaped += *i;
346  }
347 }
348 
349 /// Escape the 5 characters representing the predefined XML entities.
350 ///
351 /// The resulting entities and their matching characters are:
352 ///
353 /// &lt; for the character '<', &gt; for the character '>', &apos; for
354 /// the character ''', &quot; for the character '"', and &amp; for the
355 /// character '&'.
356 ///
357 //// @param str the input string to read to search for the characters
358 //// to escape.
359 ////
360 //// @return the resulting string that contains the pre-defined
361 //// characters escaped as predefined entitites.
362 std::string
363 escape_xml_string(const std::string& str)
364 {
365  std::string result;
366  escape_xml_string(str, result);
367  return result;
368 }
369 
370 /// Escape the '-' character, to avoid having a '--' in a comment.
371 ///
372 /// The resulting entity for '-' is '&#45;'.
373 ///
374 //// @param str the input string to read to search for the characters
375 //// to escape.
376 ////
377 //// @param escaped the output string where to write the resulting
378 //// string that contains the pre-defined characters escaped as
379 //// predefined entitites.
380 void
381 escape_xml_comment(const std::string& str,
382  std::string& escaped)
383 {
384  for (std::string::const_iterator i = str.begin(); i != str.end(); ++i)
385  switch (*i)
386  {
387  case '-':
388  escaped += "&#45;";
389  break;
390  default:
391  escaped += *i;
392  }
393 }
394 
395 /// Escape the '-' character, to avoid having a '--' in a comment.
396 ///
397 /// The resulting entity for '-' is '&#45;'.
398 ///
399 //// @param str the input string to read to search for the characters
400 //// to escape.
401 ////
402 //// @return the resulting string that contains the pre-defined
403 //// characters escaped as predefined entitites.
404 std::string
405 escape_xml_comment(const std::string& str)
406 {
407  std::string result;
408  escape_xml_comment(str, result);
409  return result;
410 }
411 
412 /// Read a string, detect the 5 predefined XML entities it may contain
413 /// and un-escape them, by writting their corresponding characters
414 /// back in. The pre-defined entities are:
415 ///
416 /// &lt; for the character '<', &gt; for the character '>', &apos; for
417 /// the character ''', &quot; for the character '"', and &amp; for the
418 /// character '&'.
419 ///
420 /// @param str the input XML string to consider.
421 ///
422 /// @param escaped where to write the resulting un-escaped string.
423 void
424 unescape_xml_string(const std::string& str,
425  std::string& escaped)
426 {
427  std::string::size_type i = 0;
428  while (i < str.size())
429  {
430  if (str[i] == '&')
431  {
432  if (str[i+1] == 'l'
433  && str[i+2] == 't'
434  && str[i+3] == ';')
435  {
436  escaped += '<';
437  i+= 4;
438  }
439  else if (str[i+1] == 'g'
440  && str[i+2] == 't'
441  && str[i+3] == ';')
442  {
443  escaped += '>';
444  i += 4;
445  }
446  else if (str[i+1] == 'a'
447  && str[i+2] == 'm'
448  && str[i+3] == 'p'
449  && str[i+4] == ';')
450  {
451  escaped += '&';
452  i += 5;
453  }
454  else if (str[i+1] == 'a'
455  && str[i+2] == 'p'
456  && str[i+3] == 'o'
457  && str[i+4] == 's'
458  && str[i+5] == ';')
459  {
460  escaped += '\'';
461  i += 6;
462  }
463  else if (str[i+1] == 'q'
464  && str[i+2] == 'u'
465  && str[i+3] == 'o'
466  && str[i+4] == 't'
467  && str[i+5] == ';')
468  {
469  escaped += '"';
470  i += 6;
471  }
472  else
473  {
474  escaped += str[i];
475  ++i;
476  }
477  }
478  else
479  {
480  escaped += str[i];
481  ++i;
482  }
483  }
484 }
485 
486 /// Read a string, detect the 5 predefined XML entities it may contain
487 /// and un-escape them, by writting their corresponding characters
488 /// back in. The pre-defined entities are:
489 ///
490 /// &lt; for the character '<', &gt; for the character '>', &apos; for
491 /// the character ''', &quot; for the character '"', and &amp; for the
492 /// character '&'.
493 ///
494 /// @param str the input XML string to consider.
495 ///
496 /// @return escaped where to write the resulting un-escaped string.
497 std::string
498 unescape_xml_string(const std::string& str)
499 {
500  std::string result;
501  unescape_xml_string(str, result);
502  return result;
503 }
504 
505 /// Read a string, detect the '#&45;' entity and un-escape it into
506 /// the '-' character.
507 ///
508 /// @param str the input XML string to consider.
509 ///
510 /// @param escaped where to write the resulting un-escaped string.
511 void
512 unescape_xml_comment(const std::string& str,
513  std::string& escaped)
514 {
515  std::string::size_type i = 0;
516  while (i < str.size())
517  {
518  if (str[i] == '&'
519  && str[i + 1] == '#'
520  && str[i + 2] == '4'
521  && str[i + 3] == '5'
522  && str[i + 4] == ';')
523  {
524  escaped += '-';
525  i += 5;
526  }
527  else
528  {
529  escaped += str[i];
530  ++i;
531  }
532  }
533 }
534 
535 /// Read a string, detect the '#&45;' entity and un-escape it into
536 /// the '-' character.
537 ///
538 /// @param str the input XML string to consider.
539 ///
540 /// @return escaped where to write the resulting un-escaped string.
541 std::string
542 unescape_xml_comment(const std::string& str)
543 {
544  std::string result;
545  unescape_xml_comment(str, result);
546  return result;
547 }
548 
549 }//end namespace xml
550 }//end namespace abigail
shared_ptr< T > build_sptr(T *p)
This is to be specialized for the diverse C types that needs wrapping in shared_ptr.
reader_sptr new_reader_from_istream(std::istream *in)
Instanciate an xmlTextReader that parses a content coming from an input stream.
void unescape_xml_comment(const std::string &str, std::string &escaped)
Read a string, detect the '#&45;' entity and un-escape it into the '-' character. ...
shared_ptr< xmlChar > xml_char_sptr
A convenience typedef for a shared pointer of xmlChar.
bool xml_char_sptr_to_string(xml_char_sptr &ssptr, std::string &s)
Convert a shared pointer to xmlChar into an std::string.
reader_sptr new_reader_from_buffer(const std::string &buffer)
Instanciate an xmlTextReader that parses the content of an in-memory buffer, wrap it into a smart poi...
shared_ptr< xmlTextReader > build_sptr< xmlTextReader >(::xmlTextReader *p)
Build and return a shared_ptr for a pointer to xmlTextReader.
shared_ptr< xmlChar > build_sptr< xmlChar >(xmlChar *p)
Build and return a shared_ptr for a pointer to xmlChar.
shared_ptr< xmlTextReader > reader_sptr
A convenience typedef for a shared pointer of xmlTextReader.
This functor is used to instantiate a shared_ptr for the xmlTextReader.
Toplevel namespace for libabigail.
file_type guess_file_type(istream &in)
Guess the type of the content of an input stream.
void escape_xml_comment(const std::string &str, std::string &escaped)
Escape the '-' character, to avoid having a '–' in a comment.
file_type
The different types of files understood the bi* suite of tools.
void unescape_xml_string(const std::string &str, std::string &escaped)
Read a string, detect the 5 predefined XML entities it may contain and un-escape them, by writting their corresponding characters back in. The pre-defined entities are:
void initialize()
The initialization function of libxml2 abstraction layer. This function must be called prior to using...
void escape_xml_string(const std::string &str, std::string &escaped)
Escape the 5 characters representing the predefined XML entities.
int get_xml_node_depth(xmlNodePtr n)
Return the depth of an xml element node.
The XZ (lzma) compresson scheme.
reader_sptr new_reader_from_file(const std::string &path)
Instantiate an xmlTextReader that parses the content of an on-disk file, wrap it into a smart pointer...
This functor is used to instantiate a shared_ptr for xmlChar.