/*
 * symsnarf.c - Code for snarfing (aka retrieving) debug symbol info
 * on program addresses.  Uses libdl for getting basic info and libbfd for
 * getting debugging info.
 *
 * refdbg - GObject refcount debugger
 * Copyright (C) 2004-2006 Josh Green <jgreen@users.sourceforge.net>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA or point your web browser to http://www.gnu.org.
 */
#define _GNU_SOURCE             // For dladdr() and Dl_info structure

#include <string.h>
#include <dlfcn.h>
#include <bfd.h>
#include <glib.h>

#include "refdbg_priv.h"


/* FIXME - Is there a better way to determine how to handle object offsets?
 *
 * Flag value found in the bfd->flags field.
 * A slight hack here to determine if an ELF object is dynamic or not.  If it
 * is dynamic then offsets into libbfd addresses start at 0.  If its not
 * dynamic, then it is an executable and addresses are absolute. */
#define ELF_DYNAMIC  0x40


typedef struct
{
  bfd *abfd;                    /* open abfd file handle to an object file */
  asymbol **syms;               /* loaded symbols from the object file */
} BfdInfo;


/* return address symbol info hash (address => AddrInfo) */
static GHashTable *sym_hash;

/* BFD library hash (object_file_name => BfdInfo) */
static GHashTable *bfd_hash;


/* frees a value from the sym_hash table */
static void
sym_hash_val_destroy (gpointer data)
{
  g_slice_free (AddrInfo, data);
}

/* frees a value from the bfd_hash table */
static void
bfd_hash_val_destroy (gpointer data)
{
  g_slice_free (BfdInfo, data);
}

/*
 * refdbg_sym_snarf_init:
 *
 * Init the symbol snarfing system.
 *
 * Returns: TRUE on success, FALSE on error (libbfd problems)
 */
gboolean
refdbg_sym_snarf_init (void)
{
  static gboolean initialized = FALSE;

  if (initialized)
    return (TRUE);
  initialized = TRUE;

  bfd_init ();

  sym_hash = g_hash_table_new_full (NULL, NULL, NULL, sym_hash_val_destroy);
  bfd_hash = g_hash_table_new_full (g_str_hash, g_str_equal,
      NULL, bfd_hash_val_destroy);
  return (TRUE);
}

/*
 * refdbg_sym_snarf:
 * @addr: Address to get info on
 *
 * Get debug symbol information for a given program address.
 *
 * Returns: Cached symbol information or NULL on error
 */
AddrInfo *
refdbg_sym_snarf (void *addr)
{
  bfd_vma ofs;
  AddrInfo *info;
  BfdInfo *bfdinfo;
  asection *secp;
  Dl_info dlinfo;
  bfd_size_type size;
  bfd_vma start;
  const char *fname, *func;
  unsigned int line;
  long allocsize;

  /* see if the given address has already been cached */
  info = g_hash_table_lookup (sym_hash, addr);
  if (info)
    return (info);

  info = g_slice_new0 (AddrInfo);

  if (dladdr (addr, &dlinfo)) {
    info->obj_fname = dlinfo.dli_fname;
    info->obj_addr = dlinfo.dli_fbase;
    info->func = dlinfo.dli_sname;
    info->func_addr = dlinfo.dli_saddr;
  }

  /* add new info to hash */
  g_hash_table_insert (sym_hash, addr, info);

  /* can't get symbol info if no object file name or addr is lower than object
     base address */
  if (!info->obj_fname || addr < info->obj_addr)
    return (info);              /* not a complete failure, just no source fname:line */

  /* see if symbols already loaded for library */
  bfdinfo = g_hash_table_lookup (bfd_hash, info->obj_fname);

  if (!bfdinfo) {               /* load object file with libbfd if we haven't already */
    char *allocstr = NULL;

    /* create new BfdInfo and add to bfd_hash (if things fail, don't try again) */
    bfdinfo = g_slice_new0 (BfdInfo);   /* init to NULLs, in case of failure  */
    g_hash_table_insert (bfd_hash, (gpointer) (info->obj_fname), bfdinfo);

    fname = info->obj_fname;

    /* non-absolute path likely means its the main executable */
    if (!g_path_is_absolute (info->obj_fname)) {        /* search in path */
      allocstr = g_find_program_in_path (info->obj_fname);
      if (allocstr)
        fname = allocstr;
    }

    bfdinfo->abfd = bfd_openr (fname, NULL);    /* open the file with libbfd */
    g_free (allocstr);

    if (!bfdinfo->abfd)
      return (info);            /* failed to open the object file */

    /* Run the magical bfd_check_format() - you no fucking run you get crash!
     * Get the size of syms to allocate */
    if (!bfd_check_format (bfdinfo->abfd, bfd_object)
        || (allocsize = bfd_get_symtab_upper_bound (bfdinfo->abfd)) <= 0) {
      bfd_close (bfdinfo->abfd);
      bfdinfo->abfd = NULL;
      return (info);            /* error or no symbols? */
    }

    bfdinfo->syms = (asymbol **) g_malloc (allocsize);  /* alloc for syms */

    /* process symbol table - something screwy happened? */
    if (bfd_canonicalize_symtab (bfdinfo->abfd, bfdinfo->syms) <= 0) {
      g_free (bfdinfo->syms);
      bfd_close (bfdinfo->abfd);
      bfdinfo->abfd = NULL;
      bfdinfo->syms = NULL;
      return (info);
    }
  }

  if (!bfdinfo->abfd)
    return (info);

  /* If dynamic flag is set (for all library objects), then convert to offset.
   * Absolute addresses are used for executables. */
  if (bfdinfo->abfd->flags & ELF_DYNAMIC)
    ofs = addr - info->obj_addr;
  else
    ofs = addr - (void *) 0;

  /* loop on BFD sections looking for the one which contains addr */
  for (secp = bfdinfo->abfd->sections; secp != NULL; secp = secp->next) {       /* skip sections which aren't actually loaded into memory */
    if (!(bfd_get_section_flags (bfdinfo->abfd, secp) & SEC_ALLOC))
      continue;

    /* get start address of section, skip if addr is below it */
    start = bfd_get_section_vma (bfdinfo->abfd, secp);
    if (ofs < start)
      continue;

    /* get size of section, skip if addr is past the end */
    size = bfd_get_section_size (secp);
    if (ofs >= start + size)
      continue;

    if (bfd_find_nearest_line (bfdinfo->abfd, secp, bfdinfo->syms,
            ofs - start, &fname, &func, &line)) {
      info->src_fname = fname;
      if (func)
        info->func = func;      /* also set by dladdr */
      info->src_line = line;

      /* if func_addr was not found by dladdr and we have a func name now,
       * try looking it up with libbfd.
       * FIXME - Is there a more efficient way to lookup a sym with libbfd? */
      if (!info->func_addr && info->func) {
        asymbol **asymp;

        for (asymp = bfdinfo->syms; *asymp; asymp++) {  /* does symbol match? */
          if (strcmp (bfd_asymbol_name (*asymp), info->func) == 0) {
            info->func_addr = bfd_asymbol_value (*asymp) + (void *) 0;
            break;
          }
        }
      }
    }

    break;
  }

  return (info);
}

/*
 * Get the current size of the symbol hash (i.e., how many symbols are cached).
 */
guint
refdbg_sym_hash_size (void)
{
  return (g_hash_table_size (sym_hash));
}
