Add more missing MachO constants and structs

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
This commit is contained in:
Jakub Konka 2020-08-30 09:21:40 +02:00
parent 427e2d689d
commit 04361dd461

View File

@ -81,6 +81,182 @@ pub const symtab_command = extern struct {
strsize: u32,
};
/// This is the second set of the symbolic information which is used to support
/// the data structures for the dynamically link editor.
///
/// The original set of symbolic information in the symtab_command which contains
/// the symbol and string tables must also be present when this load command is
/// present. When this load command is present the symbol table is organized
/// into three groups of symbols:
/// local symbols (static and debugging symbols) - grouped by module
/// defined external symbols - grouped by module (sorted by name if not lib)
/// undefined external symbols (sorted by name if MH_BINDATLOAD is not set,
/// and in order the were seen by the static
/// linker if MH_BINDATLOAD is set)
/// In this load command there are offsets and counts to each of the three groups
/// of symbols.
///
/// This load command contains a the offsets and sizes of the following new
/// symbolic information tables:
/// table of contents
/// module table
/// reference symbol table
/// indirect symbol table
/// The first three tables above (the table of contents, module table and
/// reference symbol table) are only present if the file is a dynamically linked
/// shared library. For executable and object modules, which are files
/// containing only one module, the information that would be in these three
/// tables is determined as follows:
/// table of contents - the defined external symbols are sorted by name
/// module table - the file contains only one module so everything in the
/// file is part of the module.
/// reference symbol table - is the defined and undefined external symbols
///
/// For dynamically linked shared library files this load command also contains
/// offsets and sizes to the pool of relocation entries for all sections
/// separated into two groups:
/// external relocation entries
/// local relocation entries
/// For executable and object modules the relocation entries continue to hang
/// off the section structures.
pub const dysymtab_command = extern struct {
/// LC_DYSYMTAB
cmd: u32,
/// sizeof(struct dysymtab_command)
cmdsize: u32,
// The symbols indicated by symoff and nsyms of the LC_SYMTAB load command
// are grouped into the following three groups:
// local symbols (further grouped by the module they are from)
// defined external symbols (further grouped by the module they are from)
// undefined symbols
//
// The local symbols are used only for debugging. The dynamic binding
// process may have to use them to indicate to the debugger the local
// symbols for a module that is being bound.
//
// The last two groups are used by the dynamic binding process to do the
// binding (indirectly through the module table and the reference symbol
// table when this is a dynamically linked shared library file).
/// index of local symbols
ilocalsym: u32,
/// number of local symbols
nlocalsym: u32,
/// index to externally defined symbols
iextdefsym: u32,
/// number of externally defined symbols
nextdefsym: u32,
/// index to undefined symbols
iundefsym: u32,
/// number of undefined symbols
nundefsym: u32,
// For the for the dynamic binding process to find which module a symbol
// is defined in the table of contents is used (analogous to the ranlib
// structure in an archive) which maps defined external symbols to modules
// they are defined in. This exists only in a dynamically linked shared
// library file. For executable and object modules the defined external
// symbols are sorted by name and is use as the table of contents.
/// file offset to table of contents
tocoff: u32,
/// number of entries in table of contents
ntoc: u32,
// To support dynamic binding of "modules" (whole object files) the symbol
// table must reflect the modules that the file was created from. This is
// done by having a module table that has indexes and counts into the merged
// tables for each module. The module structure that these two entries
// refer to is described below. This exists only in a dynamically linked
// shared library file. For executable and object modules the file only
// contains one module so everything in the file belongs to the module.
/// file offset to module table
modtaboff: u32,
/// number of module table entries
nmodtab: u32,
// To support dynamic module binding the module structure for each module
// indicates the external references (defined and undefined) each module
// makes. For each module there is an offset and a count into the
// reference symbol table for the symbols that the module references.
// This exists only in a dynamically linked shared library file. For
// executable and object modules the defined external symbols and the
// undefined external symbols indicates the external references.
/// offset to referenced symbol table
extrefsymoff: u32,
/// number of referenced symbol table entries
nextrefsyms: u32,
// The sections that contain "symbol pointers" and "routine stubs" have
// indexes and (implied counts based on the size of the section and fixed
// size of the entry) into the "indirect symbol" table for each pointer
// and stub. For every section of these two types the index into the
// indirect symbol table is stored in the section header in the field
// reserved1. An indirect symbol table entry is simply a 32bit index into
// the symbol table to the symbol that the pointer or stub is referring to.
// The indirect symbol table is ordered to match the entries in the section.
/// file offset to the indirect symbol table
indirectsymoff: u32,
/// number of indirect symbol table entries
nindirectsyms: u32,
// To support relocating an individual module in a library file quickly the
// external relocation entries for each module in the library need to be
// accessed efficiently. Since the relocation entries can't be accessed
// through the section headers for a library file they are separated into
// groups of local and external entries further grouped by module. In this
// case the presents of this load command who's extreloff, nextrel,
// locreloff and nlocrel fields are non-zero indicates that the relocation
// entries of non-merged sections are not referenced through the section
// structures (and the reloff and nreloc fields in the section headers are
// set to zero).
//
// Since the relocation entries are not accessed through the section headers
// this requires the r_address field to be something other than a section
// offset to identify the item to be relocated. In this case r_address is
// set to the offset from the vmaddr of the first LC_SEGMENT command.
// For MH_SPLIT_SEGS images r_address is set to the the offset from the
// vmaddr of the first read-write LC_SEGMENT command.
//
// The relocation entries are grouped by module and the module table
// entries have indexes and counts into them for the group of external
// relocation entries for that the module.
//
// For sections that are merged across modules there must not be any
// remaining external relocation entries for them (for merged sections
// remaining relocation entries must be local).
/// offset to external relocation entries
extreloff: u32,
/// number of external relocation entries
nextrel: u32,
// All the local relocation entries are grouped together (they are not
// grouped by their module since they are only used if the object is moved
// from it staticly link edited address).
/// offset to local relocation entries
locreloff: u32,
/// number of local relocation entries
nlocrel: u32,
};
/// The linkedit_data_command contains the offsets and sizes of a blob
/// of data in the __LINKEDIT segment.
pub const linkedit_data_command = extern struct {
@ -97,6 +273,127 @@ pub const linkedit_data_command = extern struct {
datasize: u32,
};
/// The dyld_info_command contains the file offsets and sizes of
/// the new compressed form of the information dyld needs to
/// load the image. This information is used by dyld on Mac OS X
/// 10.6 and later. All information pointed to by this command
/// is encoded using byte streams, so no endian swapping is needed
/// to interpret it.
pub const dyld_info_command = extern struct {
/// LC_DYLD_INFO or LC_DYLD_INFO_ONLY
cmd: u32,
/// sizeof(struct dyld_info_command)
cmdsize: u32,
// Dyld rebases an image whenever dyld loads it at an address different
// from its preferred address. The rebase information is a stream
// of byte sized opcodes whose symbolic names start with REBASE_OPCODE_.
// Conceptually the rebase information is a table of tuples:
// <seg-index, seg-offset, type>
// The opcodes are a compressed way to encode the table by only
// encoding when a column changes. In addition simple patterns
// like "every n'th offset for m times" can be encoded in a few
// bytes.
/// file offset to rebase info
rebase_off: u32,
/// size of rebase info
rebase_size: u32,
// Dyld binds an image during the loading process, if the image
// requires any pointers to be initialized to symbols in other images.
// The bind information is a stream of byte sized
// opcodes whose symbolic names start with BIND_OPCODE_.
// Conceptually the bind information is a table of tuples:
// <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend>
// The opcodes are a compressed way to encode the table by only
// encoding when a column changes. In addition simple patterns
// like for runs of pointers initialzed to the same value can be
// encoded in a few bytes.
/// file offset to binding info
bind_off: u32,
/// size of binding info
bind_size: u32,
// Some C++ programs require dyld to unique symbols so that all
// images in the process use the same copy of some code/data.
// This step is done after binding. The content of the weak_bind
// info is an opcode stream like the bind_info. But it is sorted
// alphabetically by symbol name. This enable dyld to walk
// all images with weak binding information in order and look
// for collisions. If there are no collisions, dyld does
// no updating. That means that some fixups are also encoded
// in the bind_info. For instance, all calls to "operator new"
// are first bound to libstdc++.dylib using the information
// in bind_info. Then if some image overrides operator new
// that is detected when the weak_bind information is processed
// and the call to operator new is then rebound.
/// file offset to weak binding info
weak_bind_off: u32,
/// size of weak binding info
weak_bind_size: u32,
// Some uses of external symbols do not need to be bound immediately.
// Instead they can be lazily bound on first use. The lazy_bind
// are contains a stream of BIND opcodes to bind all lazy symbols.
// Normal use is that dyld ignores the lazy_bind section when
// loading an image. Instead the static linker arranged for the
// lazy pointer to initially point to a helper function which
// pushes the offset into the lazy_bind area for the symbol
// needing to be bound, then jumps to dyld which simply adds
// the offset to lazy_bind_off to get the information on what
// to bind.
/// file offset to lazy binding info
lazy_bind_off: u32,
/// size of lazy binding info
lazy_bind_size: u32,
// The symbols exported by a dylib are encoded in a trie. This
// is a compact representation that factors out common prefixes.
// It also reduces LINKEDIT pages in RAM because it encodes all
// information (name, address, flags) in one small, contiguous range.
// The export area is a stream of nodes. The first node sequentially
// is the start node for the trie.
//
// Nodes for a symbol start with a uleb128 that is the length of
// the exported symbol information for the string so far.
// If there is no exported symbol, the node starts with a zero byte.
// If there is exported info, it follows the length.
//
// First is a uleb128 containing flags. Normally, it is followed by
// a uleb128 encoded offset which is location of the content named
// by the symbol from the mach_header for the image. If the flags
// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is
// a uleb128 encoded library ordinal, then a zero terminated
// UTF8 string. If the string is zero length, then the symbol
// is re-export from the specified dylib with the same name.
// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following
// the flags is two uleb128s: the stub offset and the resolver offset.
// The stub is used by non-lazy pointers. The resolver is used
// by lazy pointers and must be called to get the actual address to use.
//
// After the optional exported symbol information is a byte of
// how many edges (0-255) that this node has leaving it,
// followed by each edge.
// Each edge is a zero terminated UTF8 of the addition chars
// in the symbol, followed by a uleb128 offset for the node that
// edge points to.
/// file offset to lazy binding info
export_off: u32,
/// size of lazy binding info
export_size: u32,
};
/// A program that uses a dynamic linker contains a dylinker_command to identify
/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker
/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
@ -681,6 +978,24 @@ pub const N_TYPE = 0x0e;
/// external symbol bit, set for external symbols
pub const N_EXT = 0x01;
/// symbol is undefined
pub const N_UNDF = 0x0;
/// symbol is absolute
pub const N_ABS = 0x2;
/// symbol is defined in the section number given in n_sect
pub const N_SECT = 0xe;
/// symbol is undefined and the image is using a prebound
/// value for the symbol
pub const N_PBUD = 0xc;
/// symbol is defined to be the same as another symbol; the n_value
/// field is an index into the string table specifying the name of the
/// other symbol
pub const N_INDR = 0xa;
/// global symbol: name,,NO_SECT,type,0
pub const N_GSYM = 0x20;
@ -781,6 +1096,35 @@ pub const N_LENG = 0xfe;
/// a debug section
pub const S_ATTR_DEBUG = 0x02000000;
/// section contains only true machine instructions
pub const S_ATTR_PURE_INSTRUCTIONS = 0x80000000;
/// section contains coalesced symbols that are not to be in a ranlib
/// table of contents
pub const S_ATTR_NO_TOC = 0x40000000;
/// ok to strip static symbols in this section in files with the
/// MH_DYLDLINK flag
pub const S_ATTR_STRIP_STATIC_SYMS = 0x20000000;
/// no dead stripping
pub const S_ATTR_NO_DEAD_STRIP = 0x10000000;
/// blocks are live if they reference live blocks
pub const S_ATTR_LIVE_SUPPORT = 0x8000000;
/// used with i386 code stubs written on by dyld
pub const S_ATTR_SELF_MODIFYING_CODE = 0x4000000;
/// section contains some machine instructions
pub const S_ATTR_SOME_INSTRUCTIONS = 0x400;
/// section has external relocation entries
pub const S_ATTR_EXT_RELOC = 0x200;
/// section has local relocation entries
pub const S_ATTR_LOC_RELOC = 0x100;
pub const cpu_type_t = integer_t;
pub const cpu_subtype_t = integer_t;
pub const integer_t = c_int;