#!/bin/bash
#
# vectool version 0.7  2001-09-24
# vectool: Automate creation and maintenance of shared libs at fixed addresses.
#   Copyright 2001 BitWagon Software LLC.  All rights reserved.
#   This file may be used, modified, and distributed under the terms
#   of the GNU General Public License version 2.
#
# Usage:
#   vectool --mode=prog <libtool-$link_command>
#   vectool --mode=lib  --addrfile=<file> --rpathdir=<dir> 
#        --exclexp="patterns" \
#        --exclrel="patterns" \
#        --inclrel="patterns" \
#       <libtool-$archive_cmds-for-shared>
# such as for kdelibs-2.2/libtool when --mode==link, near line 3595:
#        for cmd in $cmds; do
#          IFS="$save_ifs"
#          $show "$cmd"
#          $run eval vectool --mode=lib \
#            --addrfile=/home/kde-2.2/KDE_SHLIB_ADDRS \
#            --rpathdir=/home/kde-2.2/KDE_RPATHDIR \
#            "$cmd" || exit $?
#        done
# For example,
#   vectool --mode=lib --addrfile=KDE_SHLIB_ADDRS --rpathdir=KDE_RPATHDIR \
#       g++ -shared *.o -L... -l... -Wl,... -o libfoo.so.1
#
# The --addrfile=<file> is a pathname to a file which accumulates
# the fixed address and soname of each library as it is built.
# This file must be preserved to support maintenance
# (rebuilds, bugfixes, etc.) of the libraries that are built.
#
# The --rpathdir=<dir> is a pathname to a directory which accumulates
# symbolic links to each library and <soname>.exp as it is built.
# [libtool 1.4 does not understand how to run ldd correctly, and we
# must find dependent libraries that have been built but not yet installed.
# An effort was made to fix/integrate/cooperate libtool and vectool,
# but working with libtool 1.4, which decided to avoid shellscript functions
# in order to be as portable as possible, was just too difficult.]
#
# The --exclexp="patterns" is a list of regex patterns to exclude symbols
# from the export vector.  The default is '^__V_ ^_init$ ^_fini$'.
# See the documentation for expvec.
# The --exclrel and --inclrel are regex patterns for finalizing relocation
# in the output shared library.  See the docs for relvec.  Defaults are
#   --inclrel='.'
#   --exclrel='^__ti ^__environ$ ^__ctype_'
#
# The remaining parameters are shell (libtool) commands.
#
# The export vector file is named <soname>.exp ; compiled to <soname>.exp.o .
# The loader script file is named <soname>.lds .
#
# Requires: expvec impvec relvec rpathrm
# Requires: bash cat dirname ed egrep gcc gdb grep ld ldd mv objdump rm sed size
#
# Methodology: Use expvec to build a normal, trial version of the output shared
# library with variable runtime address, and construct the export vector.
# Use ldd to make sure that all dependencies can be found.  If some unfound
# dependency is at a fixed address, then there will be trouble at runtime.
# Construct an import vector using the export vectors (if any) of the
# NEEDED first-level dependencies (found by objdump -p).
# See how much address space this library uses now, and calculate the fixed
# base address.  Find the upper bound from the minimum of the standard ELF
# main (0x08048000) and all other libraries with fixed addresses, whether
# the libraries are dependents or not.  (The _next_ module could depend
# on any subset of the libraries, so no overlap is allowed.)
# If there is a base address from last time, then see if the library still
# fits between it and the upper bound.  If there is no old base, then
# calculate a base which allows some room for expansion (33%, plus a
# few pages), but otherwise abuts the upper bound.  Store the base in a
# loader script and the file of assigned addresses.  Then rebuild the library
# with fixed base address using the loader script.
#
# [The export vector resides in a read-only PT_LOAD, and and each symbol
# in the vector will have a R_386_PC32 relocation because the ld policy for
# ET_DYN files is to allow superseding.  However, we want those symbols
# always to resolve to the local definitions, so afterwards we run the
# utility 'relvec' to perform the relocations once and for all, and
# remove the DT_TEXTREL mark from the _DYNAMIC section.  The net effect
# is much like using -Bsymbolic for symbols in the export vector.
# In fact, using -Bsymbolic often works.  But it eagerly binds all the
# other relocations, too, and sometimes that is not what is wanted.
# Also, when the ld in binutils 2.9.11 sees -Bsymbolic, then it always
# complains for every locally-undefined symbol (even for symbols that are
# defined in listed dependent libraries), and gives non-zero exit status
# if there are any at all.  This makes it impossible to separate true
# errors from the expected undefined symbols, which interferes with the
# normal use of 'make'.  ld should untangle its features.]
#
# Thus the libraries with fixed addresses will descend from 0x08048000,
# which allows 128MB of space.  Everything is automatic as long as each
# library continues to fit within its assigned space.  When some library
# gets too big (this _is_ detected), then manual intervention is needed.
# Just delete the .lds loader script and the address assignment for the
# culprit, then rerun vectool.  Vectool will assign a new address that
# is lower than any other.  This leaves a "hole", but it is also just
# more room for the library immediately below.  An alternate fix is
# to re-assign enough unused expansion space from the library below.
#
# Adjust the room for expansion according to experience.
# [Hint: consider starting with the debug, non-optimized versions!]
#

SET_X=""         # production use
SET_X="set -x"   # debugging: we use "set - ...", which clears -x
PS4='+$LINENO '  # makes "set -x" traceable
eval $SET_X      # makes "set -x" persist when debugging

function succeed_or_die() {  # line cmds...
# redirection at call site has already happened!
  line="$1"; shift
  "$@"; rv=$?
  if test $rv -eq 0; then
    return;
  fi
  echo failed:$rv "$0":"$line": "$@"  >&2
  exit $rv
}

function get_export_vectors() {  # path
  path="$1"

  # Try to cooperate with libtool
  case "$path" in
  *.so.*)
    la_path="${path%.so.*}.la"
  ;;
  *.so)
    la_path="${path%.so}.la"
  ;;
  *)
    if test -r "$path".la; then
      la_path="$path".la
    else
      la_path="$path"
    fi
  esac

  # Recovery for non-libtool cases
  if test -r "$la_path"; then
    : # OK
  elif test -r "$path"; then
    la_path="$path"
  else
    echo failed:get_export_vectors "$path" 1>&2
    exit 1
  fi

  # Build egrep pattern for sonames that are listed in Dynamic Section.
  level1pat=$(
    objdump -p "$path"  | (
      set +x
      pat=''
      while read key value rest
      do
        # operator '==' first appears in bash-2.05 (April 2001)
        # so we use '=' instead, which looks like a mistake to C programmers.
        if test "NEEDED" = "$key"; then
          if test -n "$pat"; then
            pat="$pat|"
          fi
          pat="$pat	$value =>"
        fi
      done
      echo "$pat"
    )
  )

  # Be sure that all dependencies are present and accounted for.
  case "$la_path" in
  */*)                      ;;
  *)   la_path=./"$la_path" ;;
  esac
  lddout=$(
    ## kernel bug (!) gives bad AT_PHDR for execve() of ET_DYN at fixed address
    ##      ldd                  "$la_path"
       /lib/ld-linux.so.2 --list "$la_path"
  ); rv=$?
  bad=$(echo "$lddout"  |  grep '=> not found')
  if test $rv -ne 0 -o "$bad" != ""; then
    echo "$0": "$path": "$bad"  >&2
    exit 1
  fi

  # Use export vectors from first-level DT_NEEDED only.
  set +x
  echo "$lddout"  |
  egrep -e "$level1pat"  |
  while read needed arrow file addr rest
  do
    if test -r "$file".exp; then
      echo "$file".exp
    fi
  done
}

function load_with_imports() {  # line output vecfiles cmds...
  line="$1"; shift
  output="$1"; shift
  vecfiles="$1"; shift

  # Construct import vector
  succeed_or_die "$line"-a impvec '^__pure_virtual$' $vecfiles "$@" \
     > "$output".imp.s

  # Compile import vector
  succeed_or_die "$line"-b gcc -c -o "$output".imp.o "$output".imp.s

  # Reload with import vector
  succeed_or_die "$line"-c "$@" "$output".imp.o

  # rm -f "$output".imp.o "$output".imp.s
}

TMPF=/tmp/vectool.$$  # "gdb-5.0 -batch" does not read stdin
trap "rm -f $TMPF*" EXIT
rm -f $TMPF*

# In Linux kernels 2.4.5 and 2.2.18, there is a bug which sets AT_PHDR
# to the bad value (load_bias + 2 * p_vaddr) instead of the good value
# (load_bias + p_vaddr) whenever an ET_DYN also requests PT_INTERP.
# The result is that applying 'ldd' to a shared-library output from vectool
# gets a SIGSEGV, which scares users.  The workaround is to run
# "/lib/ld-linux.so.2 --list ./<lib>" instead of "ldd <lib>"; the resolved
# .sonames will be correct, but the addresses will be different.
# vectool runs ldd only on ET_EXEC, and PT_INTERP is ignored when
# an ET_DYN is used as a library instead of a main program.  Still,
# it seems to be a good idea to avoid the SIGSEGV from ldd, if possible.
# The only way to inhibit gcc from forcing a PT_INTERP for ET_EXEC
# is to use a modified specs file:
#
# sed  > $TMPF.specs -e 's/%{!dynamic-linker:[^}]*}//' \
#   < $( (gcc -v 2>&1)  |  (read Reading specs from file; echo "$file") )
#
# However, ld supplies a default PT_INTERP of /usr/lib/libc.so.1 which
# does not exist, resulting in the even stranger
#   "/usr/bin/ldd: ./foo.so: No such file or directory"
# where the ENOENT is for the PT_INTERP but the message says the ET_DYN.
# So we need to get the kernel bug fixed:  fs/binfmt_elf.c >= 2.4.7
#

### libkdefakes.so has these symbols undefined.  It seems not to matter.
##cat >$TMPF.main.s <<EOF
##main: ret
##	.data
##_fp_hw: .long 0; .weak,_fp_hw
##
##	.weak main
##	.size main,1
##	.type main,@function
##EOF
##gcc -c -o $TMPF.main.o $TMPF.main.s
### end libkdefakes.so workaround


# All ".libs" are hold-overs from a failed attempt to integrate with
# libtool version 1.4.  (libtool 1.4 does not understand 'ldd' at all.)

# Parse args: delete -shared; find -o; add .libs to -rpath-link .
# Also add .libs to -rpath , so that ldd will work at build time.
# This is a bug, but necessary because prerequisite .so
# are not installed as they are built.
mode=lib
addrfile="VECTOOL_ADDRESSES"
exclexp='^__V_ ^_init$ ^_fini$'
exclrel='^__ti ^__environ$ ^__ctype_'
inclrel='.'
added_rpath=''
rpathdir="$PWD"
dasho="a.out"
ld_script=''
version_script=''

export soname=''
export so_path=''
new_args=''
original_args="$@"



set +x  # this part typically is long and uninteresting

rpath_now=''
while test $# -gt 0
do
  arg="$1"; shift

  case "$arg" in  # mode-switching parameters.  Note 'continue' back to 'while'.
    --addrfile=*)
      addrfile="${arg#--addrfile=}"
      continue
    ;;
    --exclexp=*)
      exclexp="${arg#--exclexp=}"
      continue;
    ;;
    --exclrel=*)
      exclrel="${arg#--exclrel=}"
      continue;
    ;;
    --inclrel=*)
      inclrel="${arg#--inclrel=}"
      continue;
    ;;
    --mode=*)
      mode="${arg#--mode=}"
      continue
    ;;
    --rpathdir=*)
      rpathdir="${arg#--rpathdir=}"
      continue
    ;;
  esac

  if test "1" = "$rpath_now"; then
    rpath_now=''
    # needed for our use of ldd
    new_args="$new_args -Wl,-rpath,$rpathdir -L$rpathdir"
    if test -z "$added_rpath"; then
      added_rpath="$rpathdir"
    else
      added_rpath="$added_rpath:$rpathdir"
    fi
  fi
  if test '' = "$new_args"; then
    rpath_now='1'
  fi

  if test "prog" = "$mode"; then
    case "$arg" in
      -o)  # record output name
        dasho="$1"; shift
        new_args="$new_args -o $dasho"
      ;;
      *)
        new_args="$new_args $arg"
      ;;
    esac
    continue
  fi

  case "$arg" in  # only for lib mode
    -Wl,-rpath-link[,=]*)  # append .libs to this arg
      new_args="$new_args $arg:$PWD/.libs"
    ;;
    -Wl,-rpath-link | -rpath-link)  # append .libs to next arg
      new_args="$new_args $arg"
      arg="$1"; shift
      new_args="$new_args $arg:$PWD/.libs"
    ;;

    -Wl,-rpath[,=]*)  # append .libs to this arg
      new_args="$new_args $arg:$PWD/.libs"
      added_rpath="$added_rpath:$PWD/.libs"
    ;;
    -Wl,-rpath | -rpath)  # append .libs to next arg
      new_args="$new_args $arg"
      arg="$1"; shift
      new_args="$new_args $arg:$PWD/.libs"
      added_rpath="$added_rpath:$PWD/.libs"
    ;;

    -Wl,--script[,=]*)
      ld_script="${arg##-Wl,--script?}"
      new_args="$new_args $arg"
    ;;

    -Wl,-soname[,=]*)
      soname="${arg#-Wl,-soname?}"
      new_args="$new_args $arg"
    ;;
    -Wl,-soname | -soname)
      new_args="$new_args $arg"
      arg="$1"; shift
      soname="${arg#-Wl,}"
      new_args="$new_args $arg"
    ;;

    -Wl,--version-script[,=]*)
      version_script="${arg#-Wl,--version-script?}"
      new_args="$new_args $arg"
    ;;

    -T)
      ld_script="$1"; shift
      ## will add back ld_script later
      # new_args="$new_args -T $_script"
    ;;

    -shared)
      new_args="$new_args -shared -Wl,--allow-shlib-undef"
    ;;

    -o)  # record so_path
      so_path="$1"; shift
      new_args="$new_args -o $so_path"
    ;;
    *)  # pass through
      new_args="$new_args $arg"
    ;;
  esac
done

set - $new_args;  eval $SET_X

if test "prog" = "$mode"; then
  succeed_or_die $LINENO "$@"  # normal load

  vecfiles=$( get_export_vectors "$dasho" )

  if test -n "$vecfiles"; then
    load_with_imports $LINENO "$dasho" "$vecfiles" "$@"
  fi
  exit 0
fi

#
# Create shared library at fixed address.
#

# Use .soname in directory that is named or implied by -o .
export so_dir="$(dirname $so_path)"
               exppath="$so_dir/$soname.exp"
ld_script="${ld_script:-$so_dir/$soname.lds}"

# If readable, then use existing loader script.
if test -r "$ld_script"; then
  arg1="$1"; shift
  set - "$arg1" -Wl,--script="$ld_script" "$@";  eval $SET_X
else
  : use default ET_EXEC to compute sizes, and construct loader script later.
fi

# If user version script, then check for vector name, and add it if needed.
if test -n "$version_script"; then
  vecname=__V_$( echo "$soname" | sed -e 's/\./_/g' )
  grep --quiet "$vecname" "$version_script"  ||
    ed "$version_script"  2>/dev/null  <<EOF
      /global:/s//& $vecname;/
      w
      q
EOF
fi

# Load with export vector.
if test -r "$exppath".o -a -r "$exppath"; then
  echo "note: using existing compiled export vector $exppath.o"  >&2
  succeed_or_die $LINENO "$@" "$exppath".o
elif test -r "$exppath"; then
  echo "note: rebuilding export vector, based on existing $exppath"  >&2
  succeed_or_die $LINENO mv "$exppath" "$exppath".old
  succeed_or_die $LINENO expvec --rebuild @"$exppath".old \
    "$exclexp" '' "$@"  > "$exppath"  3> "$exppath".o
else
  # Construct new export vector:
  # include every function; exclude vector names and _init and _fini;
  # no special version script.
  # Save compiled export vector from fd 3.
  echo "note: generating new export vector $exppath and $exppath.o"  >&2
  succeed_or_die $LINENO expvec '.'  \
    "$exclexp" '' "$@"  > "$exppath"  3> "$exppath".o
fi
# Subsequent loading of import vectors must retain the compiled export vector.
set - "$@" "$exppath".o;  eval $SET_X


vecfiles=$( get_export_vectors "$so_path" )

# Avoid address conflict with _all_ other libraries with fixed addresses.
       min_addr=0
export max_addr=$(
  # hex constants first appear in bash-2.05 (April 2001), so use decimal
  echo "p 0x08048000" >$TMPF  # initial upper bound [128+ MB] for a library
  gdb -batch -n -x $TMPF  |  (read var eq value rest; echo $value)

  rm -f $TMPF
)

if test ! -r "$addrfile"; then
  echo note: creating new file "$addrfile" of fixed address assignments  >&2
  > "$addrfile"
fi

while read addr name rest
do
  case "$addr" in
  '#*')
    continue  # ignore comment lines
  ;;
  esac

  if test "$max_addr" -le "$addr"; then
    echo "$0": "$addrfile" is not in descending order: $max_addr $addr "$name"  >&2
    exit 1
  fi
  if test "$name" = "$soname"; then
    min_addr=$addr
    break;
  else
    max_addr=$addr  # depends on descending order
  fi
done <"$addrfile"


if test ! -r "$ld_script"; then
  # [re-]Construct first-time loader script.

  # Compute loose fit.
  # Take required size, add 3 pages (12KB), allow 33% extra room for expansion.
  best_addr=$(
    size "$so_path"  |  (
      read rest  # discard column labels
      read text data bss rest
      echo "p ($max_addr-(($text+$data+$bss+0x3fff)*4/3))&~0xfff" >$TMPF
      gdb -batch -n -x $TMPF  |  (read var eq value rest; echo $value)
      rm -f $TMPF
    )
  )

  if test $min_addr -eq 0; then  # true first time
    echo "p/x $best_addr" >$TMPF
    echo "$best_addr  $soname  # $(gdb -batch -n -x $TMPF | 
      (read var eq addr; echo $addr) )"  >>"$addrfile"
    rm -f $TMPF
  else
    echo $0: note: rebuilding "$ld_script" using base $min_addr \
      for "$soname" from "$addrfile"  >&2
    if test $min_addr -ne $best_addr; then
      # $ld_script got deleted, $soname was still in $addrfile,
      # and address assignment is no longer "optimal".
      echo "$0": warning: "$so_path": using base $min_addr instead of $best_addr  >&2
      best_addr=$min_addr  # use what $addrfile said
    fi
  fi

  # Recover the general script ("ld --verbose" and first two commands to sed).
  # Then alter the base address.
  ld --verbose -shared  |
  succeed_or_die $LINENO sed  > "$ld_script"  \
    -e '1,/=====/d' \
    -e '/=====/,$d' \
    -e "s/ 0 + SIZEOF_HEADERS/ $best_addr + SIZEOF_HEADERS/"

  exec $0 $original_args  # relink using newly-created $ld_script, and EXIT !
  # NOT REACHED
else  # Check for conformance to existing loader script.
  # Compute tight fit.
  max_addr=$(
    size "$so_path"  |  (
      read rest  # discard column labels
      read text data bss rest
      echo "p $max_addr-((0xfff+$text)&~0xfff)-((0xfff+(0xfff&$text)+$data+$bss)&~0xfff)" > $TMPF
      gdb -batch -n -x $TMPF  |  (read var eq value rest; echo $value)
      rm -f $TMPF
    )
  )

  # Check that address fits, and matches existing loader script.
  if test $max_addr -lt $min_addr; then
    echo "$0": "$so_path": address conflict $max_addr '<' $min_addr >&2
    exit 1
  fi

  # Substitute min_addr into user-supplied ld_script, if necessary [glibc].
  grep --quiet ' 0 + SIZEOF_HEADERS' "$ld_script" &&
    ed "$ld_script" 2>/dev/null <<EOF
      / 0 + SIZEOF_HEADERS/s// $min_addr + SIZEOF_HEADERS/
      w
      q
EOF

  lds_addr=$(
    < "$ld_script"  sed -n -e '/ + SIZEOF_HEADERS/{
      s/[^=]*=\([ xX0-9A-Fa-f]*\).*/\1/p
      q
    }'
  )

  if test $lds_addr -ne $min_addr; then
    echo "$0": address conflict: $lds_addr in "$ld_script", $min_addr in $addrfile  >&2
    exit 1
  fi

  # Add to -rpath any SEARCH_DIR directory from the ld_script. This is
  # the usual way to get /lib, /usr/lib, /usr/local/lib searched at runtime.
  # Note: this parsing is only heuristic; lines with 'SEARCH_DIR'
  # must contain SEARCH_DIR commands only.
  for dir in $( < "$ld_script" sed -n -e '/SEARCH_DIR(\([^)]*\)) *;/s//\1/gp' )
  do
    set - "$@" -Wl,-rpath,"$dir";  eval $SET_X
    added_rpath="$added_rpath:$dir"
  done
fi


# Reload, using import vectors.
if test -n "$vecfiles"; then
  load_with_imports $LINENO "$so_path" "$vecfiles" "$@"
else
  # Reload using ld_script.
  succeed_or_die $LINENO "$@"
fi

# Subsequent libtool and ldd at build time must find what we just built.
(
  ## solame="${soname%.so*}".la
  cd "$so_dir"
  ## rm -f "$rpathdir"/"$solame"    ; ln -s "$PWD"/"$solame"     "$rpathdir"
  rm -f "$rpathdir"/"$soname"    ; ln -s "$PWD"/"$soname"     "$rpathdir"
  rm -f "$rpathdir"/"$soname".exp; ln -s "$PWD"/"$soname".exp "$rpathdir"
)

## Cannot do this until installed, or everything that uses it has been built.
## # Remove added_rpath (and perhaps DT_RPATH) from PT_DYNAMIC.
echo "After installing" "$so_path" "(or after all builds), run:"  1>&2
echo "rpathrm" "$so_path" "$added_rpath"  1>&2

# Finalize the R_386_PC32 in the vector, and R_386_RELATIVE;
# and most other relocations to "internal" symbols.
mv "$so_path" "$so_path".tmp
relvec "$so_path".tmp "$inclrel" "$exclrel"  > "$so_path"  &&
  rm "$so_path".tmp
