aboutsummaryrefslogblamecommitdiff
path: root/decomp/data.c
blob: 034a688e21f1a0b79c8c05a200fc7d985ce9d995 (plain) (tree)




















































































































































































































































                                                                               
/* data.c
 * This file is part of Decomp - a decompiler.  In this file are
 * routines for disassembling data sections from an object file.
 *
 * Copyright (C) 2001, Jonathan duSaint <dusaint@earthlink.net>
 *
 * Started around 1 December 2001.
 */

#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <ctype.h>

#include "decomp.h"


/* Whether or not to align the datum size. */
int unalign;


/* find_size
 * Guess the size of the datum at OFFSET.
 */
unsigned long
find_size (hash_t symtab, char *data, unsigned long base,
	   unsigned long offset, unsigned long limit)
{
  unsigned long seek;

  /* check for a zero byte or for another symbol */
  for (seek = offset + 1; data[seek]; seek++) {
    if (hash_get (symtab, base + seek) != NULL) { unalign = 1; break; }
  }

  return seek - offset;
}


/* check_is_string
 * Determine if STR, which is LEN bytes long, is a string or not, using
 * a heuristic.
 */
int
check_is_string (char *str, unsigned long len) 
{
  unsigned long k = -1;

  if (len < 2) return 0;

  if (str[len - 1] != '\0' && str[len] != '\0') return 0;

  while (++k < len - 1) if (!isgraph (str[k]) && !isspace (str[k])) return 0;

  return 1;
}

/* escape_string
 * Convert chars that need escaping into an escape.  The returned string
 * must be freed.
 */
char *
escape_string (char *str)
{
  unsigned long k, o;
  char *esc;

  /* allocate a lot */
  esc = xmalloc (strlen (str) * 2 + 1);

  for (k = 0, o = 0; k < strlen (str); k++)
    switch (str[k])
      {
      case '\f':
	esc[o++] = '\\';
	esc[o++] = 'f';
	break;
      case '\n':
	esc[o++] = '\\';
	esc[o++] = 'n';
	break;
      case '\r':
	esc[o++] = '\\';
	esc[o++] = 'r';
	break;
      case '\t':
	esc[o++] = '\\';
	esc[o++] = 't';
	break;
      case '\v':
	esc[o++] = '\\';
	esc[o++] = 'v';
	break;
      case '"':
	esc[o++] = '\\';
	esc[o++] = '"';
	break;
      case '\\':
	esc[o++] = '\\';
	esc[o++] = '\\';
	break;
      default:
	esc[o++] = str[k];
	break;
      }

  return esc;
}

/* print_symbol
 * Print the named object at VADDR.
 */
void
print_symbol (hash_t symtab, struct symtab *sym, unsigned long load_addr,
	      unsigned long *offset, unsigned long limit,
	      unsigned long align, char *data, FILE *ofp)
{
  unsigned long k, size;
  char *buff;

  unalign = 0;

  if (sym->size == 0)
    size = find_size (symtab, data, load_addr, *offset, limit);
  else
    size = sym->size;

  /* double check the size */
  if (size == 0) /* rare but possible */
    size = align;

  buff = xmalloc (size);
  memcpy (buff, data + *offset, size);

/*   fprintf (ofp, "\t.size\t%ld\n", size); */

  if (check_is_string (buff, size))
    {
      char *esc = escape_string (buff);

      fprintf (ofp, "\t.string\t\"%s\"", esc);

      xfree (esc);
    }
  else
    {
      /* if we guessed the size, it's not a string and there isn't a symbol
	 immediately after, round up to align */
      if (sym->size == 0 && !unalign) while (size % align) size++;

      switch (size)
	{
	case 1:
	  fprintf (ofp, "\t.byte\t%#x", *buff);
	  break;
	case 2:
	  fprintf (ofp, "\t.hword\t%#hx", *(uint16_t *)buff);
	  break;
	case 4:
	  fprintf (ofp, "\t.int\t%#x", *(uint32_t *)buff);
	  break;
	case 8:
	  fprintf (ofp, "\t.quad\t%#llx", *(uint64_t *)buff);
	  break;
	default:
	  for (k = 0; k < size; k++)
	    {
	      if (!(k % 8)) fprintf (ofp, "\n\t.byte\t0x%02hhx", buff[k]);
	      else fprintf (ofp, ", 0x%02hhx", buff[k]);
	    }
	}
    }

  fputc ('\n', ofp);

  xfree (buff);

  *offset += size;
}

/* decode_data_section
 * Disassemble a data section into named and unnamed chunks of data.
 */
void
decode_data_section (struct file_info *fi, struct section_info *s,
		     hash_t symtab, FILE *ofp)
{
  int need_to_print_header = 1;
  unsigned long offset = 0;
  struct symtab *sym;
  char *data;

  data = xmalloc (s->section_size);
  fseek (fi->fp, s->section_offset, SEEK_SET);
  if (fread (data, sizeof (char), s->section_size, fi->fp) < s->section_size)
    error_out (ERR_READ_SECTION);


  fprintf (ofp, "\n\n\t.section\t%s%s\n\t.align\t%d\n", s->section_name,
	   s->flags, s->align);

  for (offset = 0; offset < s->section_size;)
    {
      sym = hash_get (symtab, s->load_address + offset);

      if (sym != NULL)
	{
	  fputc ('\n', ofp);

	  /* if this sym is global, mark it */
	  if (sym->type == GLOBAL_DATA || sym->type == GLOBAL_UNKNOWN)
	    fprintf (ofp, "\n\t.global\t%s", sym->name);

	  /* if it's data, say so */
	  if (sym->type == GLOBAL_DATA || sym->type == LOCAL_DATA
	      || sym->type == UNKNOWN_DATA)
	    fprintf (ofp, "\n\t.type\t%s,@object", sym->name);

	  if (sym->size != 0) fprintf (ofp, "\n\t.size\t%s, %ld", sym->name,
				       sym->size);

	  fprintf (ofp, "\n%s:", sym->name);
	  print_symbol (symtab, sym, s->load_address, &offset, s->section_size,
			s->align, data, ofp);

	  /* skip to next aligned address */
	  /* this doesn't seem to be a good idea - not sure why */
	  /* maybe it's because data isn't always aligned in the object file */
/* 	  while ((s->load_address + offset) % s->align) offset++; */
	  need_to_print_header = 1;
	}
      else /* no data - just print out the section contents */
	{
	  char byte = data[offset++];

	  if (!((s->load_address + offset) % 8) || need_to_print_header)
	    {
	      fprintf (ofp, "\n\t.byte\t0x%02hhx", byte);
	      need_to_print_header = 0;
	    }
	  else
	    fprintf (ofp, ", 0x%02hhx", byte);
	}
    }
}