libabigail
abg-symtab-reader.cc
Go to the documentation of this file.
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2025 Red Hat, Inc.
5 // Copyright (C) 2020-2025 Google, Inc.
6 //
7 // Author: Matthias Maennich
8 
9 /// @file
10 ///
11 /// This contains the definition of the symtab reader
12 
13 #include <algorithm>
14 #include <iostream>
15 #include <unordered_map>
16 #include <unordered_set>
17 
18 #include "abg-elf-helpers.h"
19 #include "abg-fwd.h"
20 #include "abg-internal.h"
21 #include "abg-tools-utils.h"
22 
23 // Though this is an internal header, we need to export the symbols to be able
24 // to test this code. TODO: find a way to export symbols just for unit tests.
25 ABG_BEGIN_EXPORT_DECLARATIONS
26 #include "abg-symtab-reader.h"
28 
29 namespace abigail
30 {
31 
32 namespace symtab_reader
33 {
34 
35 /// symtab_filter implementations
36 
37 /// Determine whether a symbol is matching the filter criteria of this filter
38 /// object. In terms of a filter functionality, you would _not_ filter out
39 /// this symbol if it passes this (i.e. returns true).
40 ///
41 /// @param symbol The Elf symbol under test.
42 ///
43 /// @return whether the symbol matches all relevant / required criteria
44 bool
45 symtab_filter::matches(const elf_symbol& symbol) const
46 {
47  if (functions_ && *functions_ != symbol.is_function())
48  return false;
49  if (variables_ && *variables_ != symbol.is_variable())
50  return false;
51  if (public_symbols_ && *public_symbols_ != symbol.is_public())
52  return false;
53  if (undefined_symbols_ && *undefined_symbols_ == symbol.is_defined())
54  return false;
55  if (kernel_symbols_ && *kernel_symbols_ != symbol.is_in_ksymtab())
56  return false;
57 
58  return true;
59 }
60 
61 /// symtab implementations
62 
63 /// Obtain a suitable default filter for iterating this symtab object.
64 ///
65 /// The symtab_filter obtained is populated with some sensible default
66 /// settings, such as public_symbols(true) and kernel_symbols(true) if the
67 /// binary has been identified as Linux Kernel binary.
68 ///
69 /// @return a symtab_filter with sensible populated defaults
72 {
73  symtab_filter filter;
74  filter.set_public_symbols();
75  if (is_kernel_binary_)
76  filter.set_kernel_symbols();
77  return filter;
78 }
79 
80 /// Get a vector of symbols that are associated with a certain name
81 ///
82 /// @param name the name the symbols need to match
83 ///
84 /// @return a vector of symbols, empty if no matching symbols have been found
85 const elf_symbols&
86 symtab::lookup_symbol(const std::string& name) const
87 {
88  static const elf_symbols empty_result;
89  const auto it = name_symbol_map_.find(name);
90  if (it != name_symbol_map_.end())
91  return it->second;
92  return empty_result;
93 }
94 
95 /// Lookup a symbol by its address
96 ///
97 /// @param symbol_addr the starting address of the symbol
98 ///
99 /// @return a symbol if found, else an empty sptr
100 const elf_symbol_sptr&
101 symtab::lookup_symbol(GElf_Addr symbol_addr) const
102 {
103  static const elf_symbol_sptr empty_result;
104  const auto addr_it = addr_symbol_map_.find(symbol_addr);
105  if (addr_it != addr_symbol_map_.end())
106  return addr_it->second;
107  else
108  {
109  // check for a potential entry address mapping instead,
110  // relevant for ppc ELFv1 binaries
111  const auto entry_it = entry_addr_symbol_map_.find(symbol_addr);
112  if (entry_it != entry_addr_symbol_map_.end())
113  return entry_it->second;
114  }
115  return empty_result;
116 }
117 
118 /// Lookup an undefined function symbol with a given name.
119 ///
120 /// @param sym_name the name of the function symbol to lookup.
121 ///
122 /// @return the undefined function symbol found or nil if none was
123 /// found.
124 const elf_symbol_sptr
125 symtab::lookup_undefined_function_symbol(const std::string& sym_name)
126 {
127  auto it = undefined_fn_symbols_.find(sym_name);
128  if (it == undefined_fn_symbols_.end())
129  return elf_symbol_sptr();
130  return it->second;
131 }
132 
133 /// Lookup an undefined variable symbol with a given name.
134 ///
135 /// @param sym_name the name of the variable symbol to lookup.
136 ///
137 /// @return the undefined variable symbol found or nil if none was
138 /// found.
139 const elf_symbol_sptr
140 symtab::lookup_undefined_variable_symbol(const std::string& sym_name)
141 {
142  symtab_filter f = make_filter();
143  f.set_functions(false);
144  f.set_public_symbols(false);
145  f.set_undefined_symbols(true);
146  f.set_variables(true);
147 
148  elf_symbol_sptr result;
149  for (auto sym : filtered_symtab(*this, f))
150  if (sym_name == sym->get_name())
151  {
152  result = sym;
153  break;
154  }
155  return result;
156 }
157 
158 /// Test if a given function symbol has been exported.
159 ///
160 /// Note that this doesn't test if the symbol is defined or not, but
161 /// assumes the symbol is defined.
162 ///
163 /// @param name the name of the symbol we are looking for.
164 ///
165 /// @return the elf symbol if found, or nil otherwise.
167 symtab::function_symbol_is_exported(const string& name)
168 {
169  const elf_symbols& syms = lookup_symbol(name);
170  for (auto s : syms)
171  if (s->is_function() && s->is_public())
172  return s;
173 
174  return elf_symbol_sptr();
175 }
176 
177 /// Test if a given function symbol has been exported.
178 ///
179 /// Note that this doesn't test if the symbol is defined or not, but
180 /// assumes the symbol is defined.
181 ///
182 /// @param symbol_address the address of the symbol we are looking
183 /// for. Note that this address must be a relative offset from the
184 /// beginning of the .text section, just like the kind of addresses
185 /// that are present in the .symtab section.
186 ///
187 /// @return the elf symbol if found, or nil otherwise.
189 symtab::function_symbol_is_exported(const GElf_Addr symbol_address)
190 {
191  elf_symbol_sptr symbol = lookup_symbol(symbol_address);
192  if (!symbol)
193  return symbol;
194 
195  if (!symbol->is_function() || !symbol->is_public())
196  return elf_symbol_sptr();
197 
198  return symbol;
199 }
200 
201 /// Test if a given variable symbol has been exported.
202 ///
203 /// Note that this assumes the symbol is exported but doesn't test for
204 /// it.
205 ///
206 /// @param name the name of the symbol we are looking
207 /// for.
208 ///
209 /// @return the elf symbol if found, or nil otherwise.
211 symtab::variable_symbol_is_exported(const string& name)
212 {
213  const elf_symbols& syms = lookup_symbol(name);
214  for (auto s : syms)
215  if (s->is_variable() && s->is_public())
216  return s;
217 
218  return elf_symbol_sptr();
219 }
220 
221 /// Test if a given variable symbol has been exported.
222 ///
223 /// Note that this assumes the symbol is exported but doesn't test for
224 /// it.
225 ///
226 /// @param symbol_address the address of the symbol we are looking
227 /// for. Note that this address must be a relative offset from the
228 /// beginning of the .text section, just like the kind of addresses
229 /// that are present in the .symtab section.
230 ///
231 /// @return the elf symbol if found, or nil otherwise.
233 symtab::variable_symbol_is_exported(const GElf_Addr symbol_address)
234 {
235  elf_symbol_sptr symbol = lookup_symbol(symbol_address);
236  if (!symbol)
237  return symbol;
238 
239  if (!symbol->is_variable() || !symbol->is_public())
240  return elf_symbol_sptr();
241 
242  return symbol;
243 }
244 
245 /// Test if a name is a the name of an undefined function symbol.
246 ///
247 /// @param sym_name the symbol name to consider.
248 ///
249 /// @return the undefined symbol if found, nil otherwise.
251 symtab::function_symbol_is_undefined(const string& sym_name)
252 {
253  collect_undefined_fns_and_vars_linkage_names();
254  if (undefined_function_linkage_names_.count(sym_name))
255  {
256  elf_symbol_sptr sym = lookup_undefined_function_symbol(sym_name);
257  ABG_ASSERT(sym);
258  ABG_ASSERT(sym->is_function());
259  ABG_ASSERT(!sym->is_defined());
260  return sym;
261  }
262  return elf_symbol_sptr();
263 }
264 
265 /// Test if a name is a the name of an undefined variable symbol.
266 ///
267 /// @param sym_name the symbol name to consider.
268 ///
269 // @return the undefined symbol if found, nil otherwise.
271 symtab::variable_symbol_is_undefined(const string& sym_name)
272 {
273  collect_undefined_fns_and_vars_linkage_names();
274  if (undefined_variable_linkage_names_.count(sym_name))
275  {
276  elf_symbol_sptr sym = lookup_undefined_variable_symbol(sym_name);
277  ABG_ASSERT(sym);
278  ABG_ASSERT(sym->is_variable());
279  ABG_ASSERT(!sym->is_defined());
280  return sym;
281  }
282  return elf_symbol_sptr();
283 }
284 
285 /// A symbol sorting functor.
286 static struct
287 {
288  bool
289  operator()(const elf_symbol_sptr& left, const elf_symbol_sptr& right)
290  {return left->get_id_string() < right->get_id_string();}
291 } symbol_sort;
292 
293 /// Construct a symtab object and instantiate it from an ELF
294 /// handle. Also pass in the ir::environment we are living in. If
295 /// specified, the symbol_predicate will be respected when creating
296 /// the full vector of symbols.
297 ///
298 /// @param elf_handle the elf handle to load the symbol table from
299 ///
300 /// @param env the environment we are operating in
301 ///
302 /// @param is_suppressed a predicate function to determine if a symbol should
303 /// be suppressed
304 ///
305 /// @return a smart pointer handle to symtab, set to nullptr if the load was
306 /// not completed
307 symtab_ptr
308 symtab::load(Elf* elf_handle,
309  const ir::environment& env,
310  symbol_predicate is_suppressed)
311 {
312  ABG_ASSERT(elf_handle);
313 
314  symtab_ptr result(new symtab);
315  if (!result->load_(elf_handle, env, is_suppressed))
316  return {};
317 
318  return result;
319 }
320 
321 /// Construct a symtab object from existing name->symbol lookup maps.
322 /// They were possibly read from a different representation (XML maybe).
323 ///
324 /// @param function_symbol_map a map from ELF function name to elf_symbol
325 ///
326 /// @param variable_symbol_map a map from ELF variable name to elf_symbol
327 ///
328 /// @return a smart pointer handle to symtab, set to nullptr if the load was
329 /// not completed
330 symtab_ptr
331 symtab::load(string_elf_symbols_map_sptr function_symbol_map,
332  string_elf_symbols_map_sptr variables_symbol_map)
333 {
334  symtab_ptr result(new symtab);
335  if (!result->load_(function_symbol_map, variables_symbol_map))
336  return {};
337 
338  return result;
339 }
340 
341 /// Default constructor of the @ref symtab type.
342 symtab::symtab()
343  : is_kernel_binary_(false), has_ksymtab_entries_(false),
344  cached_undefined_symbol_names_(false)
345 {}
346 
347 /// Load the symtab representation from an Elf binary presented to us by an
348 /// Elf* handle.
349 ///
350 /// This method iterates over the entries of .symtab and collects all
351 /// interesting symbols (functions and variables).
352 ///
353 /// In case of a Linux Kernel binary, it also collects information about the
354 /// symbols exported via EXPORT_SYMBOL in the Kernel that would then end up
355 /// having a corresponding __ksymtab entry.
356 ///
357 /// Symbols that are suppressed will be omitted from the symbols_ vector, but
358 /// still be discoverable through the name->symbol and addr->symbol lookup
359 /// maps.
360 ///
361 /// @param elf_handle the elf handle to load the symbol table from
362 ///
363 /// @param env the environment we are operating in
364 ///
365 /// @param is_suppressed a predicate function to determine if a symbol should
366 /// be suppressed
367 ///
368 /// @return true if the load succeeded
369 bool
370 symtab::load_(Elf* elf_handle,
371  const ir::environment& env,
372  symbol_predicate is_suppressed)
373 {
374  GElf_Ehdr ehdr_mem;
375  GElf_Ehdr* header = gelf_getehdr(elf_handle, &ehdr_mem);
376  if (!header)
377  {
378  std::cerr << "Could not get ELF header: Skipping symtab load.\n";
379  return false;
380  }
381 
382  Elf_Scn* symtab_section = elf_helpers::find_symbol_table_section(elf_handle);
383  if (!symtab_section)
384  {
385  std::cerr << "No symbol table found: Skipping symtab load.\n";
386  return false;
387  }
388 
389  GElf_Shdr symtab_sheader;
390  gelf_getshdr(symtab_section, &symtab_sheader);
391 
392  // check for bogus section header
393  if (symtab_sheader.sh_entsize == 0)
394  {
395  std::cerr << "Invalid symtab header found: Skipping symtab load.\n";
396  return false;
397  }
398 
399  const size_t number_syms =
400  symtab_sheader.sh_size / symtab_sheader.sh_entsize;
401 
402  Elf_Data* symtab = elf_getdata(symtab_section, 0);
403  if (!symtab)
404  {
405  std::cerr << "Could not load elf symtab: Skipping symtab load.\n";
406  return false;
407  }
408 
409  // The __kstrtab_strings sections is basically an ELF strtab but does not
410  // support elf_strptr lookups. A single call to elf_getdata gives a handle to
411  // washed section data.
412  //
413  // The value of a __kstrtabns_FOO (or other similar) symbol is an address
414  // within the __kstrtab_strings section. To look up the string value, we need
415  // to translate from vmlinux load address to section offset by subtracting the
416  // base address of the section. This adjustment is not needed for loadable
417  // modules which are relocatable and so identifiable by ELF type ET_REL.
418  Elf_Scn* strings_section = elf_helpers::find_ksymtab_strings_section(elf_handle);
419  size_t strings_offset = 0;
420  const char* strings_data = nullptr;
421  size_t strings_size = 0;
422  if (strings_section)
423  {
424  GElf_Shdr strings_sheader;
425  gelf_getshdr(strings_section, &strings_sheader);
426  strings_offset = header->e_type == ET_REL ? 0 : strings_sheader.sh_addr;
427  Elf_Data* data = elf_getdata(strings_section, nullptr);
428  ABG_ASSERT(data->d_off == 0);
429  strings_data = reinterpret_cast<const char *>(data->d_buf);
430  strings_size = data->d_size;
431  }
432 
433  const bool is_kernel = elf_helpers::is_linux_kernel(elf_handle);
434  std::unordered_set<std::string> exported_kernel_symbols;
435  std::unordered_map<std::string, uint32_t> crc_values;
436  std::unordered_map<std::string, std::string> namespaces;
437 
438  for (size_t i = 0; i < number_syms; ++i)
439  {
440  GElf_Sym *sym, sym_mem;
441  sym = gelf_getsym(symtab, i, &sym_mem);
442  if (!sym)
443  {
444  std::cerr << "Could not load symbol with index " << i
445  << ": Skipping symtab load.\n";
446  return false;
447  }
448 
449  const char* const name_str =
450  elf_strptr(elf_handle, symtab_sheader.sh_link, sym->st_name);
451 
452  // no name, no game
453  if (!name_str)
454  continue;
455 
456  const std::string name = name_str;
457  if (name.empty())
458  continue;
459 
460  // Handle ksymtab entries. Every symbol entry that starts with __ksymtab_
461  // indicates that the symbol in question is exported through ksymtab. We
462  // do not know whether this is ksymtab_gpl or ksymtab, but that is good
463  // enough for now.
464  //
465  // We could follow up with this entry:
466  //
467  // symbol_value -> ksymtab_entry in either ksymtab_gpl or ksymtab
468  // -> addr/name/namespace (in case of PREL32: offset)
469  //
470  // That way we could also detect ksymtab<>ksymtab_gpl changes or changes
471  // of the symbol namespace.
472  //
473  // As of now this lookup is fragile, as occasionally ksymtabs are empty
474  // (seen so far for kernel modules and LTO builds). Hence we stick to the
475  // fairly safe assumption that ksymtab exported entries are having an
476  // appearence as __ksymtab_<symbol> in the symtab.
477  if (is_kernel && name.rfind("__ksymtab_", 0) == 0)
478  {
479  ABG_ASSERT(exported_kernel_symbols.insert(name.substr(10)).second);
480  continue;
481  }
482  if (is_kernel && name.rfind("__crc_", 0) == 0)
483  {
484  uint32_t crc_value;
485  ABG_ASSERT(elf_helpers::get_crc_for_symbol(elf_handle,
486  sym, crc_value));
487  ABG_ASSERT(crc_values.emplace(name.substr(6), crc_value).second);
488  continue;
489  }
490  if (strings_section && is_kernel && name.rfind("__kstrtabns_", 0) == 0)
491  {
492  // This symbol lives in the __ksymtab_strings section but st_value may
493  // be a vmlinux load address so we need to subtract the offset before
494  // looking it up in that section.
495  const size_t value = sym->st_value;
496  const size_t offset = value - strings_offset;
497  // check offset
498  ABG_ASSERT(offset < strings_size);
499  // find the terminating NULL
500  const char* first = strings_data + offset;
501  const char* last = strings_data + strings_size;
502  const char* limit = std::find(first, last, 0);
503  // check NULL found
504  ABG_ASSERT(limit < last);
505  // interpret the empty namespace name as no namespace name
506  if (first < limit)
507  ABG_ASSERT(namespaces.emplace(
508  name.substr(12), std::string(first, limit - first)).second);
509  continue;
510  }
511 
512  // filter out uninteresting entries and only keep functions/variables for
513  // now. The rest might be interesting in the future though.
514  const int sym_type = GELF_ST_TYPE(sym->st_info);
515  if (!(sym_type == STT_FUNC
516  || sym_type == STT_GNU_IFUNC
517  // If the symbol is for an OBJECT, the index of the
518  // section it refers to cannot be absolute.
519  // Otherwise that OBJECT is not a variable.
520  || (sym_type == STT_OBJECT && sym->st_shndx != SHN_ABS)
521  // Undefined global variable symbols have symbol type
522  // STT_NOTYPE. No idea why.
523  || (sym_type == STT_NOTYPE && sym->st_shndx == SHN_UNDEF)
524  || sym_type == STT_TLS))
525  continue;
526 
527  const bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
528  // this occurs in relocatable files.
529  const bool sym_is_common = sym->st_shndx == SHN_COMMON;
530 
531  elf_symbol::version ver;
532  elf_helpers::get_version_for_symbol(elf_handle, i, sym_is_defined, ver);
533 
534  const elf_symbol_sptr& symbol_sptr =
536  (env, i, sym->st_size, name,
537  elf_helpers::stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info)),
538  elf_helpers::stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info)),
539  sym_is_defined, sym_is_common, ver,
540  elf_helpers::stv_to_elf_symbol_visibility
541  (GELF_ST_VISIBILITY(sym->st_other)));
542 
543  // add to the name->symbol lookup
544  name_symbol_map_[name].push_back(symbol_sptr);
545 
546  // add to the addr->symbol lookup
547  if (symbol_sptr->is_common_symbol())
548  {
549  const auto it = name_symbol_map_.find(name);
550  ABG_ASSERT(it != name_symbol_map_.end());
551  const elf_symbols& common_sym_instances = it->second;
552  ABG_ASSERT(!common_sym_instances.empty());
553  if (common_sym_instances.size() > 1)
554  {
555  elf_symbol_sptr main_common_sym = common_sym_instances[0];
556  ABG_ASSERT(main_common_sym->get_name() == name);
557  ABG_ASSERT(main_common_sym->is_common_symbol());
558  ABG_ASSERT(symbol_sptr.get() != main_common_sym.get());
559  main_common_sym->add_common_instance(symbol_sptr);
560  }
561  }
562  else if (symbol_sptr->is_defined())
563  setup_symbol_lookup_tables(elf_handle, sym, symbol_sptr);
564  }
565 
566  // Now that symbols aliases have been constructed, let's determine
567  // what symbol has been suppressed or not. Suppression takes into
568  // account
569  for (auto& elem : name_symbol_map_)
570  {
571  auto& symbols = elem.second;
572  for (auto& symbol : symbols)
573  {
574  // We do not take suppressed symbols into our symbol vector
575  // to avoid accidental leakage. But we ensure supressed
576  // symbols are otherwise set up for lookup.
577  if (!(is_suppressed && is_suppressed(symbol)))
578  {
579  // add to the symbol vector
580  symbols_.push_back(symbol);
581  if (!symbol->is_defined())
582  {
583  if (symbol->is_function())
584  undefined_fn_symbols_[symbol->get_name()] = symbol;
585  else if (symbol->is_variable())
586  undefined_var_symbols_[symbol->get_name()] = symbol;
587  }
588  }
589  else
590  symbol->set_is_suppressed(true);
591  }
592  }
593 
594  add_alternative_address_lookups(elf_handle);
595 
596  is_kernel_binary_ = elf_helpers::is_linux_kernel(elf_handle);
597 
598  // Now apply the ksymtab_exported attribute to the symbols we collected.
599  for (const auto& symbol : exported_kernel_symbols)
600  {
601  const auto r = name_symbol_map_.find(symbol);
602  if (r == name_symbol_map_.end())
603  continue;
604 
605  for (const auto& elf_symbol : r->second)
606  if (elf_symbol->is_public())
607  elf_symbol->set_is_in_ksymtab(true);
608  has_ksymtab_entries_ = true;
609  }
610 
611  // Now add the CRC values
612  for (const auto& crc_entry : crc_values)
613  {
614  const auto r = name_symbol_map_.find(crc_entry.first);
615  if (r == name_symbol_map_.end())
616  continue;
617 
618  for (const auto& symbol : r->second)
619  symbol->set_crc(crc_entry.second);
620  }
621 
622  // Now add the namespaces
623  for (const auto& namespace_entry : namespaces)
624  {
625  const auto r = name_symbol_map_.find(namespace_entry.first);
626  if (r == name_symbol_map_.end())
627  continue;
628 
629  for (const auto& symbol : r->second)
630  symbol->set_namespace(namespace_entry.second);
631  }
632 
633  // sort the symbols for deterministic output
634  std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
635 
636  return true;
637 }
638 
639 /// Load the symtab representation from a function/variable lookup map pair.
640 ///
641 /// This method assumes the lookup maps are correct and sets up the data
642 /// vector as well as the name->symbol lookup map. The addr->symbol lookup
643 /// map cannot be set up in this case.
644 ///
645 /// @param function_symbol_map a map from ELF function name to elf_symbol
646 ///
647 /// @param variable_symbol_map a map from ELF variable name to elf_symbol
648 ///
649 /// @return true if the load succeeded
650 bool
651 symtab::load_(string_elf_symbols_map_sptr function_symbol_map,
652  string_elf_symbols_map_sptr variables_symbol_map)
653 
654 {
655  if (function_symbol_map)
656  for (const auto& symbol_map_entry : *function_symbol_map)
657  {
658  for (const auto& symbol : symbol_map_entry.second)
659  {
660  if (!symbol->is_suppressed())
661  {
662  symbols_.push_back(symbol);
663  if (!symbol->is_defined())
664  undefined_fn_symbols_[symbol->get_name()] = symbol;
665  }
666  }
667  ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
668  }
669 
670  if (variables_symbol_map)
671  for (const auto& symbol_map_entry : *variables_symbol_map)
672  {
673  for (const auto& symbol : symbol_map_entry.second)
674  {
675  if (!symbol->is_suppressed())
676  {
677  symbols_.push_back(symbol);
678  if (!symbol->is_defined())
679  undefined_var_symbols_[symbol->get_name()] = symbol;
680  }
681  }
682  ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
683  }
684 
685  // sort the symbols for deterministic output
686  std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
687 
688  return true;
689 }
690 
691 /// Notify the symtab about the name of the main symbol at a given address.
692 ///
693 /// From just alone the symtab we can't guess the main symbol of a bunch of
694 /// aliased symbols that all point to the same address. During processing of
695 /// additional information (such as DWARF), this information becomes apparent
696 /// and we can adjust the addr->symbol lookup map as well as the alias
697 /// reference of the symbol objects.
698 ///
699 /// @param addr the addr that we are updating the main symbol for
700 /// @param name the name of the main symbol
701 void
702 symtab::update_main_symbol(GElf_Addr addr, const std::string& name)
703 {
704  // get one symbol (i.e. the current main symbol)
705  elf_symbol_sptr symbol = lookup_symbol(addr);
706 
707  // The caller might not know whether the addr is associated to an ELF symbol
708  // that we care about. E.g. the addr could be associated to an ELF symbol,
709  // but not one in .dynsym when looking at a DSO. Hence, early exit if the
710  // lookup failed.
711  if (!symbol)
712  return;
713 
714  // determine the new main symbol by attempting an update
715  elf_symbol_sptr new_main = symbol->update_main_symbol(name);
716 
717  // also update the default symbol we return when looked up by address
718  if (new_main)
719  addr_symbol_map_[addr] = new_main;
720 }
721 
722 /// Various adjustments and bookkeeping may be needed to provide a correct
723 /// interpretation (one that matches DWARF addresses) of raw symbol values.
724 ///
725 /// This is a sub-routine for symtab::load_ and
726 /// symtab::add_alternative_address_lookups and must be called only
727 /// once (per symbol) during the execution of the former.
728 ///
729 /// @param elf_handle the ELF handle
730 ///
731 /// @param elf_symbol the ELF symbol
732 ///
733 /// @param symbol_sptr the libabigail symbol
734 ///
735 /// @return a possibly-adjusted symbol value
736 GElf_Addr
737 symtab::setup_symbol_lookup_tables(Elf* elf_handle,
738  GElf_Sym* elf_symbol,
739  const elf_symbol_sptr& symbol_sptr)
740 {
741  const bool is_arm32 = elf_helpers::architecture_is_arm32(elf_handle);
742  const bool is_arm64 = elf_helpers::architecture_is_arm64(elf_handle);
743  const bool is_ppc64 = elf_helpers::architecture_is_ppc64(elf_handle);
744  const bool is_ppc32 = elf_helpers::architecture_is_ppc32(elf_handle);
745 
746  GElf_Addr symbol_value =
747  elf_helpers::maybe_adjust_et_rel_sym_addr_to_abs_addr(elf_handle,
748  elf_symbol);
749 
750  if (is_arm32 && symbol_sptr->is_function())
751  // Clear bit zero of ARM32 addresses as per "ELF for the Arm
752  // Architecture" section 5.5.3.
753  // https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
754  symbol_value &= ~1;
755 
756  if (is_arm64)
757  // Copy bit 55 over bits 56 to 63 which may be tag information.
758  symbol_value = symbol_value & (1ULL<<55)
759  ? symbol_value | (0xffULL<<56)
760  : symbol_value &~ (0xffULL<<56);
761 
762  if (symbol_sptr->is_defined())
763  {
764  const auto result =
765  addr_symbol_map_.emplace(symbol_value, symbol_sptr);
766  if (!result.second)
767  // A symbol with the same address already exists. This
768  // means this symbol is an alias of the main symbol with
769  // that address. So let's register this new alias as such.
770  result.first->second->get_main_symbol()->add_alias(symbol_sptr);
771  }
772 
773  // Please note that update_function_entry_address_symbol_map depends
774  // on the symbol aliases been setup. This is why, the
775  // elf_symbol::add_alias call is done above BEFORE this point.
776  if ((is_ppc64 || is_ppc32) && symbol_sptr->is_function())
777  update_function_entry_address_symbol_map(elf_handle, elf_symbol,
778  symbol_sptr);
779 
780  return symbol_value;
781 }
782 
783 /// Update the function entry symbol map to later allow lookups of this symbol
784 /// by entry address as well. This is relevant for ppc64 ELFv1 binaries.
785 ///
786 /// For ppc64 ELFv1 binaries, we need to build a function entry point address
787 /// -> function symbol map. This is in addition to the function pointer ->
788 /// symbol map. This is because on ppc64 ELFv1, a function pointer is
789 /// different from a function entry point address.
790 ///
791 /// On ppc64 ELFv1, the DWARF DIE of a function references the address of the
792 /// entry point of the function symbol; whereas the value of the function
793 /// symbol is the function pointer. As these addresses are different, if I we
794 /// want to get to the symbol of a function from its entry point address (as
795 /// referenced by DWARF function DIEs) we must have the two maps I mentionned
796 /// right above.
797 ///
798 /// In other words, we need a map that associates a function entry point
799 /// address with the symbol of that function, to be able to get the function
800 /// symbol that corresponds to a given function DIE, on ppc64.
801 ///
802 /// The value of the function pointer (the value of the symbol) usually refers
803 /// to the offset of a table in the .opd section. But sometimes, for a symbol
804 /// named "foo", the corresponding symbol named ".foo" (note the dot before
805 /// foo) which value is the entry point address of the function; that entry
806 /// point address refers to a region in the .text section.
807 ///
808 /// So we are only interested in values of the symbol that are in the .opd
809 /// section.
810 ///
811 /// @param elf_handle the ELF handle to operate on
812 ///
813 /// @param native_symbol the native Elf symbol to update the entry for
814 ///
815 /// @param symbol_sptr the internal symbol to associte the entry address with
816 void
817 symtab::update_function_entry_address_symbol_map(
818  Elf* elf_handle, GElf_Sym* native_symbol, const elf_symbol_sptr& symbol_sptr)
819 {
820  const GElf_Addr fn_desc_addr = native_symbol->st_value;
821  const GElf_Addr fn_entry_point_addr =
822  elf_helpers::lookup_ppc64_elf_fn_entry_point_address(elf_handle,
823  fn_desc_addr);
824 
825  const std::pair<addr_symbol_map_type::const_iterator, bool>& result =
826  entry_addr_symbol_map_.emplace(fn_entry_point_addr, symbol_sptr);
827 
828  const addr_symbol_map_type::const_iterator it = result.first;
829  const bool was_inserted = result.second;
830  if (!was_inserted
831  && elf_helpers::address_is_in_opd_section(elf_handle, fn_desc_addr))
832  {
833  // Either
834  //
835  // 'symbol' must have been registered as an alias for
836  // it->second->get_main_symbol()
837  //
838  // Or
839  //
840  // if the name of 'symbol' is foo, then the name of it2->second is
841  // ".foo". That is, foo is the name of the symbol when it refers to the
842  // function descriptor in the .opd section and ".foo" is an internal name
843  // for the address of the entry point of foo.
844  //
845  // In the latter case, we just want to keep a reference to "foo" as .foo
846  // is an internal name.
847 
848  const bool two_symbols_alias =
849  it->second->get_main_symbol()->does_alias(*symbol_sptr);
850  const bool symbol_is_foo_and_prev_symbol_is_dot_foo =
851  (it->second->get_name() == std::string(".") + symbol_sptr->get_name());
852 
853  ABG_ASSERT(two_symbols_alias
854  || symbol_is_foo_and_prev_symbol_is_dot_foo);
855 
856  if (symbol_is_foo_and_prev_symbol_is_dot_foo)
857  // Let's just keep a reference of the symbol that the user sees in the
858  // source code (the one named foo). The symbol which name is prefixed
859  // with a "dot" is an artificial one.
860  entry_addr_symbol_map_[fn_entry_point_addr] = symbol_sptr;
861  }
862 }
863 
864 /// Fill up the lookup maps with alternative keys
865 ///
866 /// Due to special features like Control-Flow-Integrity (CFI), the symbol
867 /// lookup could be done indirectly. E.g. enabling CFI causes clang to
868 /// associate the DWARF information with the actual CFI protected function
869 /// (suffix .cfi) instead of with the entry symbol in the symtab.
870 ///
871 /// This function adds additional lookup keys to compensate for that.
872 ///
873 /// So far, this only implements CFI support, by adding addr->symbol pairs
874 /// where
875 /// addr : symbol value of the <foo>.cfi value
876 /// symbol : symbol_sptr looked up via "<foo>"
877 ///
878 /// @param elf_handle the ELF handle to operate on
879 void
880 symtab::add_alternative_address_lookups(Elf* elf_handle)
881 {
882  Elf_Scn* symtab_section = elf_helpers::find_symtab_section(elf_handle);
883  if (!symtab_section)
884  return;
885  GElf_Shdr symtab_sheader;
886  gelf_getshdr(symtab_section, &symtab_sheader);
887 
888  const size_t number_syms =
889  symtab_sheader.sh_size / symtab_sheader.sh_entsize;
890 
891  Elf_Data* symtab = elf_getdata(symtab_section, 0);
892 
893  for (size_t i = 0; i < number_syms; ++i)
894  {
895  GElf_Sym *sym, sym_mem;
896  sym = gelf_getsym(symtab, i, &sym_mem);
897  if (!sym)
898  {
899  std::cerr << "Could not load symbol with index " << i
900  << ": Skipping alternative symbol load.\n";
901  continue;
902  }
903 
904  const char* const name_str =
905  elf_strptr(elf_handle, symtab_sheader.sh_link, sym->st_name);
906 
907  // no name, no game
908  if (!name_str)
909  continue;
910 
911  const std::string name = name_str;
912  if (name.empty())
913  continue;
914 
915  // Add alternative lookup addresses for CFI symbols
916  static const std::string cfi = ".cfi";
917  if (name.size() > cfi.size()
918  && name.compare(name.size() - cfi.size(), cfi.size(), cfi) == 0)
919  // ... name.ends_with(".cfi")
920  {
921  const auto candidate_name = name.substr(0, name.size() - cfi.size());
922 
923  auto symbols = lookup_symbol(candidate_name);
924  // lookup_symbol returns a vector of symbols. For this case we handle
925  // only the case that there has been exactly one match. Otherwise we
926  // can't reasonably handle it and need to bail out.
927  ABG_ASSERT(symbols.size() <= 1);
928  if (symbols.size() == 1)
929  {
930  const auto& symbol_sptr = symbols[0];
931  setup_symbol_lookup_tables(elf_handle, sym, symbol_sptr);
932  }
933  }
934  }
935 }
936 
937 /// Collect the names of the variable and function symbols that are
938 /// undefined. Cache those names into sets to speed up their lookup.
939 ///
940 /// Once the names are cached into sets, subsequent invocations of
941 /// this function are essentially a no-op.
942 void
943 symtab::collect_undefined_fns_and_vars_linkage_names()
944 {
945  if (!cached_undefined_symbol_names_)
946  {
947  {
948  symtab_filter f = make_filter();
949  f.set_variables(false);
950  f.set_functions(true);
951  f.set_public_symbols(false);
952  f.set_undefined_symbols(true);
953  for (auto sym : filtered_symtab(*this, f))
954  undefined_function_linkage_names_.insert(sym->get_name());
955  }
956 
957  {
958  symtab_filter f = make_filter();
959  f.set_variables(true);
960  f.set_functions(false);
961  f.set_public_symbols(false);
962  f.set_undefined_symbols(true);
963  for (auto sym : filtered_symtab(*this, f))
964  undefined_variable_linkage_names_.insert(sym->get_name());
965  }
966  }
967  cached_undefined_symbol_names_ = true;
968 }
969 } // end namespace symtab_reader
970 } // end namespace abigail
bool is_variable() const
Test if the current instance of elf_symbol is a variable symbol or not.
Definition: abg-ir.cc:2299
static elf_symbol_sptr create(const environment &e, size_t i, size_t s, const string &n, type t, binding b, bool d, bool c, const version &ve, visibility vi, bool is_in_ksymtab=false, const abg_compat::optional< uint32_t > &crc={}, const abg_compat::optional< std::string > &ns={}, bool is_suppressed=false)
Factory of instances of elf_symbol.
Definition: abg-ir.cc:2063
void set_functions(bool new_value=true)
Enable or disable function filtering.
Helper class to allow range-for loops on symtabs for C++11 and later code. It serves as a proxy for t...
void set_undefined_symbols(bool new_value=true)
Enable or disable undefined symbol filtering.
symtab is the actual data container of the symtab_reader implementation.
This contains the declarations for the symtab reader.
std::vector< elf_symbol_sptr > elf_symbols
Convenience typedef for a vector of elf_symbol.
Definition: abg-ir.h:942
symtab_filter make_filter() const
symtab implementations
Toplevel namespace for libabigail.
bool is_defined() const
Test if the current instance of elf_symbol is defined or not.
Definition: abg-ir.cc:2252
bool is_public() const
Test if the current instance of elf_symbol is public or not.
Definition: abg-ir.cc:2274
Abstraction of an elf symbol.
Definition: abg-ir.h:960
This is an abstraction of the set of resources necessary to manage several aspects of the internal re...
Definition: abg-ir.h:147
shared_ptr< elf_symbol > elf_symbol_sptr
A convenience typedef for a shared pointer to elf_symbol.
Definition: abg-ir.h:924
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects...
Definition: abg-fwd.h:1743
bool matches(const elf_symbol &symbol) const
symtab_filter implementations
void update_main_symbol(GElf_Addr addr, const std::string &name)
Notify the symtab about the name of the main symbol at a given address.
void set_public_symbols(bool new_value=true)
Enable or disable public symbol filtering.
bool is_function() const
Test if the current instance of elf_symbol is a function symbol or not.
Definition: abg-ir.cc:2290
void set_variables(bool new_value=true)
Enable or disable variable filtering.
const elf_symbols & lookup_symbol(const std::string &name) const
Get a vector of symbols that are associated with a certain name.
This contains a set of ELF utilities used by the dwarf reader.
void set_kernel_symbols(bool new_value=true)
Enable or disable kernel symbol filtering.
bool is_in_ksymtab() const
Getter of the 'is-in-ksymtab' property.
Definition: abg-ir.cc:2313
The symtab filter is the object passed to the symtab object in order to iterate over the symbols in t...
shared_ptr< string_elf_symbols_map_type > string_elf_symbols_map_sptr
Convenience typedef for a shared pointer to string_elf_symbols_map_type.
Definition: abg-ir.h:951