Professional Documents
Culture Documents
//
// Detours Disassembler (disasm.cpp of detours.lib)
//
// Microsoft Research Detours Package, Version 4.0.1
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// #define DETOUR_DEBUG 1
#define DETOURS_INTERNAL
#include "detours.h"
#include <limits.h>
#undef ASSERT
#define ASSERT(x)
//////////////////////////////////////////////////////////////////////////////
//
// Special macros to handle the case when we are building disassembler for
// offline processing.
//
#if defined(DETOURS_X86_OFFLINE_LIBRARY) \
|| defined(DETOURS_X64_OFFLINE_LIBRARY) \
|| defined(DETOURS_ARM_OFFLINE_LIBRARY) \
|| defined(DETOURS_ARM64_OFFLINE_LIBRARY) \
|| defined(DETOURS_IA64_OFFLINE_LIBRARY)
#undef DETOURS_X64
#undef DETOURS_X86
#undef DETOURS_IA64
#undef DETOURS_ARM
#undef DETOURS_ARM64
#if defined(DETOURS_X86_OFFLINE_LIBRARY)
#elif defined(DETOURS_X64_OFFLINE_LIBRARY)
#if !defined(DETOURS_64BIT)
// Fix this as/if bugs are discovered.
//#error X64 disassembler can only build for 64-bit.
#endif
#elif defined(DETOURS_ARM64_OFFLINE_LIBRARY)
#elif defined(DETOURS_IA64_OFFLINE_LIBRARY)
#else
#error
#endif
#endif
//////////////////////////////////////////////////////////////////////////////
//
// Function:
// DetourCopyInstruction(PVOID pDst,
// PVOID *ppDstPool
// PVOID pSrc,
// PVOID *ppTarget,
// LONG *plExtra)
// Purpose:
// Copy a single instruction from pSrc to pDst.
//
// Arguments:
// pDst:
// Destination address for the instruction. May be NULL in which
// case DetourCopyInstruction is used to measure an instruction.
// If not NULL then the source instruction is copied to the
// destination instruction and any relative arguments are adjusted.
// ppDstPool:
// Destination address for the end of the constant pool. The
// constant pool works backwards toward pDst. All memory between
// pDst and *ppDstPool must be available for use by this function.
// ppDstPool may be NULL if pDst is NULL.
// pSrc:
// Source address of the instruction.
// ppTarget:
// Out parameter for any target instruction address pointed to by
// the instruction. For example, a branch or a jump insruction has
// a target, but a load or store instruction doesn't. A target is
// another instruction that may be executed as a result of this
// instruction. ppTarget may be NULL.
// plExtra:
// Out parameter for the number of extra bytes needed by the
// instruction to reach the target. For example, lExtra = 3 if the
// instruction had an 8-bit relative offset, but needs a 32-bit
// relative offset.
//
// Returns:
// Returns the address of the next instruction (following in the source)
// instruction. By subtracting pSrc from the return value, the caller
// can determinte the size of the instruction copied.
//
// Comments:
// By following the pTarget, the caller can follow alternate
// instruction streams. However, it is not always possible to determine
// the target based on static analysis. For example, the destination of
// a jump relative to a register cannot be determined from just the
// instruction stream. The output value, pTarget, can have any of the
// following outputs:
// DETOUR_INSTRUCTION_TARGET_NONE:
// The instruction has no targets.
// DETOUR_INSTRUCTION_TARGET_DYNAMIC:
// The instruction has a non-deterministic (dynamic) target.
// (i.e. the jump is to an address held in a register.)
// Address: The instruction has the specified target.
//
// When copying instructions, DetourCopyInstruction insures that any
// targets remain constant. It does so by adjusting any IP relative
// offsets.
//
#pragma data_seg(".detourd")
#pragma const_seg(".detourc")
class CDetourDis
{
public:
CDetourDis(_Out_opt_ PBYTE *ppbTarget,
_Out_opt_ LONG *plExtra);
public:
struct COPYENTRY;
typedef const COPYENTRY * REFCOPYENTRY;
// nFlagBits flags.
enum {
DYNAMIC = 0x1u,
ADDRESS = 0x2u,
NOENLARGE = 0x4u,
RAX = 0x8u,
};
// ModR/M Flags
enum {
SIB = 0x10u,
RIP = 0x20u,
NOTSIB = 0x0fu,
};
struct COPYENTRY
{
// Many of these fields are often ignored. See ENTRY_DataIgnored.
ULONG nFixedSize : 4; // Fixed size of opcode
ULONG nFixedSize16 : 4; // Fixed size when 16 bit operand
ULONG nModOffset : 4; // Offset to mod/rm byte (0=none)
ULONG nRelOffset : 4; // Offset to relative target.
ULONG nFlagBits : 4; // Flags for DYNAMIC, etc.
COPYFUNC pfCopy; // Function pointer.
};
protected:
// These macros define common uses of nFixedSize, nFixedSize16, nModOffset,
nRelOffset, nFlagBits, pfCopy.
#define ENTRY_DataIgnored 0, 0, 0, 0, 0,
#define ENTRY_CopyBytes1 { 1, 1, 0, 0, 0, &CDetourDis::CopyBytes }
#ifdef DETOURS_X64
#define ENTRY_CopyBytes1Address { 9, 5, 0, 0, ADDRESS, &CDetourDis::CopyBytes }
#else
#define ENTRY_CopyBytes1Address { 5, 3, 0, 0, ADDRESS, &CDetourDis::CopyBytes }
#endif
#define ENTRY_CopyBytes1Dynamic { 1, 1, 0, 0, DYNAMIC, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes2 { 2, 2, 0, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes2Jump { ENTRY_DataIgnored
&CDetourDis::CopyBytesJump }
#define ENTRY_CopyBytes2CantJump { 2, 2, 0, 1, NOENLARGE, &CDetourDis::CopyBytes
}
#define ENTRY_CopyBytes2Dynamic { 2, 2, 0, 0, DYNAMIC, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes3 { 3, 3, 0, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes3Dynamic { 3, 3, 0, 0, DYNAMIC, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes3Or5 { 5, 3, 0, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes3Or5Dynamic { 5, 3, 0, 0, DYNAMIC,
&CDetourDis::CopyBytes }// x86 only
#ifdef DETOURS_X64
#define ENTRY_CopyBytes3Or5Rax { 5, 3, 0, 0, RAX, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes3Or5Target { 5, 5, 0, 1, 0, &CDetourDis::CopyBytes }
#else
#define ENTRY_CopyBytes3Or5Rax { 5, 3, 0, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes3Or5Target { 5, 3, 0, 1, 0, &CDetourDis::CopyBytes }
#endif
#define ENTRY_CopyBytes4 { 4, 4, 0, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes5 { 5, 5, 0, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes5Or7Dynamic { 7, 5, 0, 0, DYNAMIC, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes7 { 7, 7, 0, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes2Mod { 2, 2, 1, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes2ModDynamic { 2, 2, 1, 0, DYNAMIC, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes2Mod1 { 3, 3, 1, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes2ModOperand { 6, 4, 1, 0, 0, &CDetourDis::CopyBytes }
#define ENTRY_CopyBytes3Mod { 3, 3, 2, 0, 0, &CDetourDis::CopyBytes } //
SSE3 0F 38 opcode modrm
#define ENTRY_CopyBytes3Mod1 { 4, 4, 2, 0, 0, &CDetourDis::CopyBytes } //
SSE3 0F 3A opcode modrm .. imm8
#define ENTRY_CopyBytesPrefix { ENTRY_DataIgnored
&CDetourDis::CopyBytesPrefix }
#define ENTRY_CopyBytesSegment { ENTRY_DataIgnored
&CDetourDis::CopyBytesSegment }
#define ENTRY_CopyBytesRax { ENTRY_DataIgnored &CDetourDis::CopyBytesRax }
#define ENTRY_CopyF2 { ENTRY_DataIgnored &CDetourDis::CopyF2 }
#define ENTRY_CopyF3 { ENTRY_DataIgnored &CDetourDis::CopyF3 } //
32bit x86 only
#define ENTRY_Copy0F { ENTRY_DataIgnored &CDetourDis::Copy0F }
#define ENTRY_Copy0F78 { ENTRY_DataIgnored &CDetourDis::Copy0F78 }
#define ENTRY_Copy0F00 { ENTRY_DataIgnored &CDetourDis::Copy0F00 } //
32bit x86 only
#define ENTRY_Copy0FB8 { ENTRY_DataIgnored &CDetourDis::Copy0FB8 } //
32bit x86 only
#define ENTRY_Copy66 { ENTRY_DataIgnored &CDetourDis::Copy66 }
#define ENTRY_Copy67 { ENTRY_DataIgnored &CDetourDis::Copy67 }
#define ENTRY_CopyF6 { ENTRY_DataIgnored &CDetourDis::CopyF6 }
#define ENTRY_CopyF7 { ENTRY_DataIgnored &CDetourDis::CopyF7 }
#define ENTRY_CopyFF { ENTRY_DataIgnored &CDetourDis::CopyFF }
#define ENTRY_CopyVex2 { ENTRY_DataIgnored &CDetourDis::CopyVex2 }
#define ENTRY_CopyVex3 { ENTRY_DataIgnored &CDetourDis::CopyVex3 }
#define ENTRY_CopyEvex { ENTRY_DataIgnored &CDetourDis::CopyEvex } //
62, 3 byte payload, then normal with implied prefixes like vex
#define ENTRY_CopyXop { ENTRY_DataIgnored &CDetourDis::CopyXop } //
0x8F ... POP /0 or AMD XOP
#define ENTRY_CopyBytesXop { 5, 5, 4, 0, 0, &CDetourDis::CopyBytes } //
0x8F xop1 xop2 opcode modrm
#define ENTRY_CopyBytesXop1 { 6, 6, 4, 0, 0, &CDetourDis::CopyBytes } //
0x8F xop1 xop2 opcode modrm ... imm8
#define ENTRY_CopyBytesXop4 { 9, 9, 4, 0, 0, &CDetourDis::CopyBytes } //
0x8F xop1 xop2 opcode modrm ... imm32
#define ENTRY_Invalid { ENTRY_DataIgnored &CDetourDis::Invalid }
protected:
PBYTE Copy0F(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE Copy0F00(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // x86 only
sldt/0 str/1 lldt/2 ltr/3 err/4 verw/5 jmpe/6/dynamic invalid/7
PBYTE Copy0F78(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // vmread,
66/extrq/ib/ib, F2/insertq/ib/ib
PBYTE Copy0FB8(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // jmpe or
F3/popcnt
PBYTE Copy66(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE Copy67(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyF2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyF3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); // x86 only
PBYTE CopyF6(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyF7(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyVex2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyVex3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p);
PBYTE CopyEvex(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
PBYTE CopyXop(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc);
protected:
static const COPYENTRY s_rceCopyTable[];
static const COPYENTRY s_rceCopyTable0F[];
static const BYTE s_rbModRm[256];
static PBYTE s_pbModuleBeg;
static PBYTE s_pbModuleEnd;
static BOOL s_fLimitReferencesToModule;
protected:
BOOL m_bOperandOverride;
BOOL m_bAddressOverride;
BOOL m_bRaxOverride; // AMD64 only
BOOL m_bVex;
BOOL m_bEvex;
BOOL m_bF2;
BOOL m_bF3; // x86 only
BYTE m_nSegmentOverride;
PBYTE * m_ppbTarget;
LONG * m_plExtra;
LONG m_lScratchExtra;
PBYTE m_pbScratchTarget;
BYTE m_rbScratchDst[64]; // matches or exceeds rbCode
};
*m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_NONE;
*m_plExtra = 0;
}
// Figure out how big the instruction is, do the appropriate copy,
// and figure out what the target of the instruction is if any.
//
REFCOPYENTRY pEntry = &s_rceCopyTable[pbSrc[0]];
return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
}
if (m_bVex || m_bEvex)
{
ASSERT(pEntry->nFlagBits == 0);
ASSERT(pEntry->nFixedSize == pEntry->nFixedSize16);
}
if (nRelOffset) {
*m_ppbTarget = AdjustTarget(pbDst, pbSrc, nBytes, nRelOffset, cbTarget);
#ifdef DETOURS_X64
if (pEntry->nRelOffset == 0) {
// This is a data target, not a code target, so we shouldn't return it.
*m_ppbTarget = NULL;
}
#endif
}
if (nFlagBits & NOENLARGE) {
*m_plExtra = -*m_plExtra;
}
if (nFlagBits & DYNAMIC) {
*m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
}
return pbSrc + nBytes;
}
if (pbSrc[0] == 0xeb) {
pbDst[0] = 0xe9;
pvDstAddr = &pbDst[1];
nNewOffset = nOldOffset - ((pbDst - pbSrc) + 3);
*(UNALIGNED LONG*&)pvDstAddr = (LONG)nNewOffset;
*m_plExtra = 3;
return pbSrc + 2;
}
pbDst[0] = 0x0f;
pbDst[1] = 0x80 | (pbSrc[0] & 0xf);
pvDstAddr = &pbDst[2];
nNewOffset = nOldOffset - ((pbDst - pbSrc) + 4);
*(UNALIGNED LONG*&)pvDstAddr = (LONG)nNewOffset;
*m_plExtra = 4;
return pbSrc + 2;
}
switch (cbTargetSize) {
case 1:
*(CHAR*&)pvTargetAddr = (CHAR)nNewOffset;
if (nNewOffset < SCHAR_MIN || nNewOffset > SCHAR_MAX) {
*m_plExtra = sizeof(ULONG) - 1;
}
break;
case 2:
*(UNALIGNED SHORT*&)pvTargetAddr = (SHORT)nNewOffset;
if (nNewOffset < SHRT_MIN || nNewOffset > SHRT_MAX) {
*m_plExtra = sizeof(ULONG) - 2;
}
break;
case 4:
*(UNALIGNED LONG*&)pvTargetAddr = (LONG)nNewOffset;
if (nNewOffset < LONG_MIN || nNewOffset > LONG_MAX) {
*m_plExtra = sizeof(ULONG) - 4;
}
break;
#if defined(DETOURS_X64)
case 8:
*(UNALIGNED LONGLONG*&)pvTargetAddr = nNewOffset;
break;
#endif
}
#ifdef DETOURS_X64
// When we are only computing size, source and dest can be
// far apart, distance not encodable in 32bits. Ok.
// At least still check the lower 32bits.
// For insertq and presumably despite documentation extrq, mode must be 11, not
checked.
// insertq/extrq/78 are followed by two immediate bytes, and given mode == 11,
mod/rm byte is always one byte,
// and the 0x78 makes 4 bytes (not counting the 66/F2/F which are accounted for
elsewhere)
// TEST BYTE /0
if (0x00 == (0x38 & pbSrc[1])) { // reg(bits 543) of ModR/M == 0
static const COPYENTRY ce = /* f6 */ ENTRY_CopyBytes2Mod1;
return (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
}
// DIV /6
// IDIV /7
// IMUL /5
// MUL /4
// NEG /3
// NOT /2
// TEST WORD /0
if (0x00 == (0x38 & pbSrc[1])) { // reg(bits 543) of ModR/M == 0
static const COPYENTRY ce = /* f7 */ ENTRY_CopyBytes2ModOperand;
return (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
}
// DIV /6
// IDIV /7
// IMUL /5
// MUL /4
// NEG /3
// NOT /2
static const COPYENTRY ce = /* f7 */ ENTRY_CopyBytes2Mod;
return (this->*ce.pfCopy)(&ce, pbDst, pbSrc);
}
*m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
}
else {
// This can access violate on random bytes. Use
DetourSetCodeModule.
*m_ppbTarget = *ppbTarget;
}
}
else {
*m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
}
}
else if (0x10 == (0x30 & b1) || // CALL /2 or /3 --> reg(bits 543) of ModR/M
== 010 or 011
0x20 == (0x30 & b1)) { // JMP /4 or /5 --> reg(bits 543) of ModR/M ==
100 or 101
*m_ppbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
}
return pbOut;
}
switch (p & 3) {
case 0: break;
case 1: m_bOperandOverride = TRUE; break;
case 2: m_bF3 = TRUE; break;
case 3: m_bF2 = TRUE; break;
}
REFCOPYENTRY pEntry;
switch (m) {
default: return Invalid(&ceInvalid, pbDst, pbSrc);
case 1: pEntry = &s_rceCopyTable0F[pbSrc[0]];
return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc);
case 2: return CopyBytes(&ceF38, pbDst, pbSrc);
case 3: return CopyBytes(&ceF3A, pbDst, pbSrc);
}
}
#ifdef DETOURS_X86
const static COPYENTRY ceBound = /* 62 */ ENTRY_CopyBytes2Mod;
if ((p0 & 0xC0) != 0xC0) {
return CopyBytes(&ceBound, pbDst, pbSrc);
}
#endif
m_bEvex = TRUE;
#ifdef DETOURS_X64
m_bRaxOverride |= !!(p1 & 0x80); // w
#endif
//////////////////////////////////////////////////////////////////////////////
//
PBYTE CDetourDis::s_pbModuleBeg = NULL;
PBYTE CDetourDis::s_pbModuleEnd = (PBYTE)~(ULONG_PTR)0;
BOOL CDetourDis::s_fLimitReferencesToModule = FALSE;
s_pbModuleBeg = pbBeg;
s_pbModuleEnd = pbEnd;
s_fLimitReferencesToModule = fLimitReferencesToModule;
return TRUE;
}
BOOL CDetourDis::SanityCheckSystem()
{
C_ASSERT(ARRAYSIZE(CDetourDis::s_rceCopyTable) == 256);
C_ASSERT(ARRAYSIZE(CDetourDis::s_rceCopyTable0F) == 256);
return TRUE;
}
#endif // defined(DETOURS_X64) || defined(DETOURS_X86)
// 00
// f.e. d.c. b.a. 9.8. 7.6. 5.4. 3.2. 1.0.
// 0000 0000 0000 0000 0000 0000 0000 001f : Template [4..0]
// 0000 0000 0000 0000 0000 03ff ffff ffe0 : Zero [ 41.. 5]
// 0000 0000 0000 0000 0000 3c00 0000 0000 : Zero [ 45.. 42]
// 0000 0000 0007 ffff ffff c000 0000 0000 : One [ 82.. 46]
// 0000 0000 0078 0000 0000 0000 0000 0000 : One [ 86.. 83]
// 0fff ffff ff80 0000 0000 0000 0000 0000 : Two [123.. 87]
// f000 0000 0000 0000 0000 0000 0000 0000 : Two [127..124]
BYTE DETOUR_IA64_BUNDLE::GetTemplate() const
{
return (data[0] & 0x1f);
}
switch (GetUnit(slot)) {
case F_UNIT:
// F14 fchkf
if (opcode == 0 && GetX(instruction) == 0 && GetX6(instruction) == 8) {
goto imm20a;
}
return false;
case M_UNIT:
// M20 x3 == 1 integer chk.s.m
// M21 x3 == 3 floating point chk.s
if (opcode == 1) {
UINT64 const x3 = GetX3(instruction);
if (x3 == 1 || x3 == 3) {
goto imm13_7;
}
}
UINT64 imm;
UINT64 new_instruction;
imm13_7:
imm = SignExtend((GetSignBit(instruction) << 20) | (GetImm13c(instruction) <<
7) | GetImm7a(instruction), 21) << 4;
new_instruction = SetSignBit(SetImm13c(SetImm7a(instruction, (extra - dest) >>
4), (extra - dest) >> 11), extra < dest);
goto set_brl;
imm20a:
imm = SignExtend((GetSignBit(instruction) << 20) | GetImm20a(instruction), 21)
<< 4;
new_instruction = SetSignBit(SetImm20a(instruction, (extra - dest) >> 4), extra
< dest);
goto set_brl;
imm20b:
imm = SignExtend((GetSignBit(instruction) << 20) | GetImm20b(instruction), 21)
<< 4;
new_instruction = SetSignBit(SetImm20b(instruction, (extra - dest) >> 4), extra
< dest);
goto set_brl;
set_brl:
if (pBundleExtra != NULL) {
pDst->SetInstruction(slot, new_instruction);
pBundleExtra->SetBrl((size_t)this + imm);
}
return true;
}
VOID DETOUR_IA64_BUNDLE::SetBrl()
{
wide[0] = 0x0000000100000005; // few
//wide[0] = 0x0000000180000005; // many
wide[1] = 0xc000000800000000;
}
return (INT64)raw;
}
wide[0] = (0x0000000100000005 |
// 0xffffc00000000000
((raw & 0x000000ffffc00000) << 24)
);
wide[1] = (
0x6000000020000000 |
// 0x0000070000000000
((raw & 0x0000000000000070) << 36) |
// 0x07fc000000000000
((raw & 0x000000000000ff80) << 43) |
// 0x0003e00000000000
((raw & 0x00000000001f0000) << 29) |
// 0x0000100000000000
((raw & 0x0000000000200000) << 23) |
// 0x00000000007fffff
((raw & 0x7fffff0000000000) >> 40) |
// 0x0800000000000000
((raw & 0x8000000000000000) >> 4)
);
}
// Relocate if necessary.
return nExtraBytes;
}
BOOL DETOUR_IA64_BUNDLE::SetNop0()
{
return SetNop(0);
}
BOOL DETOUR_IA64_BUNDLE::SetNop1()
{
return SetNop(1);
}
BOOL DETOUR_IA64_BUNDLE::SetNop2()
{
return SetNop(2);
}
VOID DETOUR_IA64_BUNDLE::SetStop()
{
data[0] |= 0x01;
}
#endif // DETOURS_IA64
if (ppTarget != NULL) {
if (pbSrc->IsBrl()) {
*ppTarget = (PVOID)pbSrc->GetBrlTarget();
}
else {
*ppTarget = DETOUR_INSTRUCTION_TARGET_NONE;
}
}
*plExtra = (LONG)pbSrc->Copy(pbDst, ppDstPool ?
((DETOUR_IA64_BUNDLE*)*ppDstPool) - 1 : (DETOUR_IA64_BUNDLE*)NULL);
return pbSrc + 1;
}
#endif // DETOURS_IA64
#ifdef DETOURS_ARM
class CDetourDis
{
public:
CDetourDis();
public:
typedef BYTE (CDetourDis::* COPYFUNC)(PBYTE pbDst, PBYTE pbSrc);
struct COPYENTRY {
USHORT nOpcode;
COPYFUNC pfCopy;
};
struct Branch5
{
DWORD Register : 3;
DWORD Imm5 : 5;
DWORD Padding : 1;
DWORD I : 1;
DWORD OpCode : 6;
};
struct Branch5Target
{
DWORD Padding : 1;
DWORD Imm5 : 5;
DWORD I : 1;
DWORD Padding2 : 25;
};
struct Branch8
{
DWORD Imm8 : 8;
DWORD Condition : 4;
DWORD OpCode : 4;
};
struct Branch8Target
{
DWORD Padding : 1;
DWORD Imm8 : 8;
DWORD Padding2 : 23;
};
struct Branch11
{
DWORD Imm11 : 11;
DWORD OpCode : 5;
};
struct Branch11Target
{
DWORD Padding : 1;
DWORD Imm11 : 11;
DWORD Padding2 : 20;
};
struct Branch20
{
DWORD Imm11 : 11;
DWORD J2 : 1;
DWORD IT : 1;
DWORD J1 : 1;
DWORD Other : 2;
DWORD Imm6 : 6;
DWORD Condition : 4;
DWORD Sign : 1;
DWORD OpCode : 5;
};
struct Branch20Target
{
DWORD Padding : 1;
DWORD Imm11 : 11;
DWORD Imm6 : 6;
DWORD J1 : 1;
DWORD J2 : 1;
DWORD Sign : 1;
INT32 Padding2 : 11;
};
struct Branch24
{
DWORD Imm11 : 11;
DWORD J2 : 1;
DWORD InstructionSet : 1;
DWORD J1 : 1;
DWORD Link : 1;
DWORD Branch : 1;
DWORD Imm10 : 10;
DWORD Sign : 1;
DWORD OpCode : 5;
};
struct Branch24Target
{
DWORD Padding : 1;
DWORD Imm11 : 11;
DWORD Imm10 : 10;
DWORD I2 : 1;
DWORD I1 : 1;
DWORD Sign : 1;
INT32 Padding2 : 7;
};
struct LiteralLoad8
{
DWORD Imm8 : 8;
DWORD Register : 3;
DWORD OpCode : 5;
};
struct LiteralLoad8Target
{
DWORD Padding : 2;
DWORD Imm8 : 8;
DWORD Padding2 : 22;
};
struct LiteralLoad12
{
DWORD Imm12 : 12;
DWORD Register : 4;
DWORD OpCodeSuffix : 7;
DWORD Add : 1;
DWORD OpCodePrefix : 8;
};
struct LiteralLoad12Target
{
DWORD Imm12 : 12;
DWORD Padding : 20;
};
struct ImmediateRegisterLoad32
{
DWORD Imm12 : 12;
DWORD DestinationRegister : 4;
DWORD SourceRegister: 4;
DWORD OpCode : 12;
};
struct ImmediateRegisterLoad16
{
DWORD DestinationRegister : 3;
DWORD SourceRegister: 3;
DWORD OpCode : 10;
};
struct TableBranch
{
DWORD IndexRegister : 4;
DWORD HalfWord : 1;
DWORD OpCodeSuffix : 11;
DWORD BaseRegister : 4;
DWORD OpCodePrefix : 12;
};
struct Shift
{
DWORD Imm2 : 2;
DWORD Imm3 : 3;
};
struct Add32
{
DWORD SecondOperandRegister : 4;
DWORD Type : 2;
DWORD Imm2 : 2;
DWORD DestinationRegister : 4;
DWORD Imm3 : 3;
DWORD Padding : 1;
DWORD FirstOperandRegister : 4;
DWORD SetFlags : 1;
DWORD OpCode : 11;
};
struct LogicalShiftLeft32
{
DWORD SourceRegister : 4;
DWORD Padding : 2;
DWORD Imm2 : 2;
DWORD DestinationRegister : 4;
DWORD Imm3 : 3;
DWORD Padding2 : 5;
DWORD SetFlags : 1;
DWORD OpCode : 11;
};
struct StoreImmediate12
{
DWORD Imm12 : 12;
DWORD SourceRegister : 4;
DWORD BaseRegister : 4;
DWORD OpCode : 12;
};
protected:
BYTE PureCopy16(BYTE* pSource, BYTE* pDest);
BYTE PureCopy32(BYTE* pSource, BYTE* pDest);
BYTE CopyMiscellaneous16(BYTE* pSource, BYTE* pDest);
BYTE CopyConditionalBranchOrOther16(BYTE* pSource, BYTE* pDest);
BYTE CopyUnConditionalBranch16(BYTE* pSource, BYTE* pDest);
BYTE CopyLiteralLoad16(BYTE* pSource, BYTE* pDest);
BYTE CopyBranchExchangeOrDataProcessing16(BYTE* pSource, BYTE* pDest);
BYTE CopyBranch24(BYTE* pSource, BYTE* pDest);
BYTE CopyBranchOrMiscellaneous32(BYTE* pSource, BYTE* pDest);
BYTE CopyLiteralLoad32(BYTE* pSource, BYTE* pDest);
BYTE CopyLoadAndStoreSingle(BYTE* pSource, BYTE* pDest);
BYTE CopyLoadAndStoreMultipleAndSRS(BYTE* pSource, BYTE* pDest);
BYTE CopyTableBranch(BYTE* pSource, BYTE* pDest);
BYTE BeginCopy32(BYTE* pSource, BYTE* pDest);
protected:
ULONG GetLongInstruction(BYTE* pSource)
{
return (((PUSHORT)pSource)[0] << 16) | (((PUSHORT)pSource)[1]);
}
protected:
PBYTE m_pbTarget;
PBYTE m_pbPool;
LONG m_lExtra;
Branch5Target target;
ZeroMemory(&target, sizeof(target));
target.Imm5 = branch.Imm5;
target.I = branch.I;
branch.Imm5 = target.Imm5;
branch.I = target.I;
return (USHORT&)branch;
}
Branch8Target target;
ZeroMemory(&target, sizeof(target));
target.Imm8 = branch.Imm8;
branch.Imm8 = target.Imm8;
return (USHORT&)branch;
}
Branch11Target target;
ZeroMemory(&target, sizeof(target));
target.Imm11 = branch.Imm11;
branch.Imm11 = target.Imm11;
return (USHORT&)branch;
}
*pDest++ = (USHORT&)branch11;
return sizeof(USHORT);
}
Branch20Target target;
ZeroMemory(&target, sizeof(target));
target.Imm11 = branch.Imm11;
target.Imm6 = branch.Imm6;
target.Sign = branch.Sign;
target.J1 = branch.J1;
target.J2 = branch.J2;
// Sign extend
if (target.Sign) {
target.Padding2 = -1;
}
return (LONG&)target;
}
branch.Imm11 = target.Imm11;
branch.Imm6 = target.Imm6;
branch.Sign = target.Sign;
branch.J1 = target.J1;
branch.J2 = target.J2;
return (ULONG&)branch;
}
LONG CDetourDis::DecodeBranch24(ULONG opcode, BOOL& fLink)
{
Branch24& branch = (Branch24&)(opcode);
Branch24Target target;
ZeroMemory(&target, sizeof(target));
target.Imm11 = branch.Imm11;
target.Imm10 = branch.Imm10;
target.Sign = branch.Sign;
target.I1 = ~(branch.J1 ^ target.Sign);
target.I2 = ~(branch.J2 ^ target.Sign);
fLink = branch.Link;
// Sign extend
if (target.Sign) {
target.Padding2 = -1;
}
return (LONG&)target;
}
branch.Imm11 = target.Imm11;
branch.Imm10 = target.Imm10;
branch.Link = fLink;
branch.Sign = target.Sign;
branch.J1 = ~(target.I1 ^ branch.Sign);
branch.J2 = ~(target.I2 ^ branch.Sign);
return (ULONG&)branch;
}
LiteralLoad8Target target;
ZeroMemory(&target, sizeof(target));
target.Imm8 = load.Imm8;
return (LONG&)target;
}
LiteralLoad12Target target;
ZeroMemory(&target, sizeof(target));
target.Imm12 = load.Imm12;
return (LONG&)target;
}
return size;
}
if (instruction) {
// Copy the 16 bit instruction over
*(PUSHORT)(pDest) = instruction;
return sizeof(USHORT); // The source instruction was 16 bits
}
// Store where we will be writing our conditional branch, and move past it
so we can emit a long branch
PUSHORT pDstInst = (PUSHORT)(pDest);
PUSHORT pConditionalBranchInstruction = pDstInst++;
// Invert the CBZ/CBNZ instruction to move past our 'long branch' if the
inverse comparison succeeds
// Write the CBZ/CBNZ instruction *before* the long branch we emitted above
// This had to be done out of order, since the size of a long branch can
vary due to alignment restrictions
instruction = EncodeBranch5(*(PUSHORT)(pSource), longBranchSize -
c_PCAdjust + sizeof(USHORT));
Branch5& branch = (Branch5&)(instruction);
branch.OpCode = (branch.OpCode & 0x02) ? 0x2C : 0x2E; // Invert the
CBZ/CBNZ comparison
*pConditionalBranchInstruction = instruction;
// ADD/SUB, SXTH, SXTB, UXTH, UXTB, CBZ, CBNZ, PUSH, POP, REV, REV15, REVSH,
NOP, YIELD, WFE, WFI, SEV, etc.
return PureCopy16(pSource, pDest);
}
// Then, emit a 'long branch' that will be hit if the original condition is
met
BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget);
// Finally, encode and emit the unconditional branch that will be used to
branch past the 'long branch' if the initial condition was not met
Branch11 branch11 = { 0x00, 0x1C };
newInstruction = EncodeBranch11(*(DWORD*)(&branch11), longBranchSize -
c_PCAdjust + sizeof(USHORT));
ASSERT(newInstruction);
*pUnconditionalBranchInstruction = newInstruction;
// BX
if ((instruction & 0xff80) == 0x4700) {
// The target is stored in a register
m_pbTarget = (PBYTE)DETOUR_INSTRUCTION_TARGET_DYNAMIC;
}
// Add/subtract register
// Add/subtract immediate
/* 0b00011 */ { 0x03, &CDetourDis::PureCopy16},
// Add/subtract/compare/move immediate
/* 0b00100 */ { 0x04, &CDetourDis::PureCopy16 },
/* 0b00101 */ { 0x05, &CDetourDis::PureCopy16 },
/* 0b00110 */ { 0x06, &CDetourDis::PureCopy16 },
/* 0b00111 */ { 0x07, &CDetourDis::PureCopy16 },
// Data-processing register
// Special data processing
// Branch/exchange instruction set
/* 0b01000 */ { 0x08, &CDetourDis::CopyBranchExchangeOrDataProcessing16 },
// Add to SP or PC
/* 0b10100 */ { 0x14, &CDetourDis::PureCopy16 },
// ToDo: Is ADR (T1) blitt-able?
// It adds a value to PC and stores the result in a register.
// Does this count as a 'target' for detours?
/* 0b10101 */ { 0x15, &CDetourDis::PureCopy16 },
// Miscellaneous
/* 0b10110 */ { 0x16, &CDetourDis::CopyMiscellaneous16 },
/* 0b10111 */ { 0x17, &CDetourDis::CopyMiscellaneous16 },
// Load/store multiple
/* 0b11000 */ { 0x18, &CDetourDis::PureCopy16 },
/* 0b11001 */ { 0x19, &CDetourDis::PureCopy16 },
// ToDo: Are we sure these are all safe?
// LDMIA, for example, can include an 'embedded' branch.
// Does this count as a 'target' for detours?
// Conditional branch
/* 0b11010 */ { 0x1a, &CDetourDis::CopyConditionalBranchOrOther16 },
// Conditional branch
// Undefined instruction
// Service (system) call
/* 0b11011 */ { 0x1b, &CDetourDis::CopyConditionalBranchOrOther16 },
// Unconditional branch
/* 0b11100 */ { 0x1c, &CDetourDis::CopyUnConditionalBranch16 },
// 32-bit instruction
/* 0b11101 */ { 0x1d, &CDetourDis::BeginCopy32 },
/* 0b11110 */ { 0x1e, &CDetourDis::BeginCopy32 },
/* 0b11111 */ { 0x1f, &CDetourDis::BeginCopy32 },
{ 0, NULL }
};
// Re-encode as 32-bit
PUSHORT pDstInst = (PUSHORT)(pDest);
LONG newDelta = CalculateNewDelta(pTarget, pDest);
instruction = EncodeBranch24(instruction, newDelta, fLink);
if (instruction) {
// Copy both bytes of the instruction
EmitLongInstruction(pDstInst, instruction);
return sizeof(DWORD);
}
// Re-encode as 32-bit
PUSHORT pDstInst = (PUSHORT)(pDest);
LONG newDelta = CalculateNewDelta(pTarget, pDest);
instruction = EncodeBranch20(instruction, newDelta);
if (instruction) {
// Copy both bytes of the instruction
EmitLongInstruction(pDstInst, instruction);
return sizeof(DWORD);
}
// Then, emit a 'long branch' that will be hit if the original condition is
met
BYTE longBranchSize = EmitLongBranch(pDstInst, pTarget);
// Finally, encode and emit the unconditional branch that will be used
// to branch past the 'long branch' if the initial condition was not met
Branch11 branch11 = { 0x00, 0x1C };
instruction = EncodeBranch11(*(DWORD*)(&branch11), longBranchSize -
c_PCAdjust + sizeof(USHORT));
ASSERT(instruction);
*pUnconditionalBranchInstruction = static_cast<USHORT>(instruction);
*--((PULONG&)m_pbPool) = (ULONG)(size_t)DETOURS_PBYTE_TO_PFUNC(pTarget);
// If the base register is anything other than PC, we can simply copy the
instruction
if (tableBranch.BaseRegister != c_PC) {
return PureCopy32(pSource, pDest);
}
__debugbreak();
// If the base register is PC, we need to manually perform the table lookup
// For example, this:
//
// 7ef40000 e8dff002 tbb [pc,r2]
//
// becomes this:
//
// 7ef40404 b401 push {r0} ; pushed as a
placeholder for the target address
// 7ef40406 e92d0005 push.w {r0,r2} ; scratch register and
another register are pushed; there's a minimum of two registers in the list for
push.w
// 7ef40410 4820 ldr r0,=0x7EF40004 ; load the table
address from the literal pool
// 7ef40414 eb000042 add r0,r0,r2,lsl #1 ; add the index value
to the address of the table to get the table entry; lsl only used if it's a TBH
instruction
// 7ef40418 f8d00000 ldr.w r0,[r0] ; dereference the table
entry to get the value of the target
// 7ef4041c ea4f0040 lsl r0,r0,#1 ; multiply the offset
by 2 (per the spec)
// 7ef40420 eb00000f add.w r0,r0,pc ; Add the offset to pc
to get the target address
// 7ef40424 f8cd000c str.w r0,[sp,#0xC] ; store the target
address on the stack (into the first push)
// 7ef40428 e8bd0005 pop.w {r0,r2} ; scratch register and
another register are popped; there's a minimum of two registers in the list for
pop.w
// 7ef4042c bd00 pop {pc} ; pop the address into
pc
//
// Load the literal pool value into our scratch register (this contains the
address of the branch table)
// ldr rn, target
EmitLiteralLoad8(pDstInst, scrReg, m_pbPool);
// Add the index offset to the address of the branch table; the result will be
the value within the table that contains the branch offset
// We need to multiply the index by two if we are using halfword indexing
// Will shift tableBranch.IndexRegister by 1 (multiply by 2) if using a TBH
EmitAdd32(pDstInst, scrReg, tableBranch.IndexRegister, scrReg,
tableBranch.HalfWord);
// Multiply the offset by two to get the true offset value (as per the spec)
EmitLogicalShiftLeft32(pDstInst, scrReg, scrReg, 1);
// Now write the contents of scrReg to the stack, so we can pop it into PC
// Write the address of the branch table entry to the stack, so we can pop it
into PC
EmitStoreImmediate12(pDstInst, scrReg, c_SP, sizeof(DWORD) * 3);
// Pop PC
*pDstInst++ = 0xbd00;
// Immediate data processing instructions; ADD, SUB, MOV, MOVN, ADR, MOVT, BFC,
SSAT16, etc.
if ((instruction & 0xF8008000) == 0xF0000000) { //
11110xxxxxxxxxxx0xxxxxxxxxxxxxxx
// Should all be blitt-able
// ToDo: What about ADR? Is it safe to do a straight-copy?
// ToDo: Not handling moves to or from PC
return PureCopy32(pSource, pDest);
}
// Table branch
if ((instruction & 0x1000F0) == 0x100000 || // TBB :
xxxxxxxxxxx1xxxxxxxxxxxx0000xxxx
(instruction & 0x1000F0) == 0x100010) { // TBH :
xxxxxxxxxxx1xxxxxxxxxxxx0001xxxx
return CopyTableBranch(pSource, pDest);
}
// Coprocessor instructions
if ((instruction & 0xEC000000) == 0xEC000000) { //
111x11xxxxxxxxxxxxxxxxxxxxxxxxxx
return PureCopy32(pSource, pDest);
}
pSrc += size;
return pSrc;
}
#endif // DETOURS_ARM
#ifdef DETOURS_ARM64
//
// Problematic instructions:
//
// ADR 0ll10000 hhhhhhhh hhhhhhhh hhhddddd & 0x9f000000 == 0x10000000 (l =
low, h = high, d = Rd)
// ADRP 1ll10000 hhhhhhhh hhhhhhhh hhhddddd & 0x9f000000 == 0x90000000 (l =
low, h = high, d = Rd)
//
// B.cond 01010100 iiiiiiii iiiiiiii iii0cccc & 0xff000010 == 0x54000000 (i =
delta = SignExtend(imm19:00, 64), c = cond)
//
// B 000101ii iiiiiiii iiiiiiii iiiiiiii & 0xfc000000 == 0x14000000 (i =
delta = SignExtend(imm26:00, 64))
// BL 100101ii iiiiiiii iiiiiiii iiiiiiii & 0xfc000000 == 0x94000000 (i =
delta = SignExtend(imm26:00, 64))
//
// CBNZ z0110101 iiiiiiii iiiiiiii iiittttt & 0x7f000000 == 0x35000000 (z =
size, i = delta = SignExtend(imm19:00, 64), t = Rt)
// CBZ z0110100 iiiiiiii iiiiiiii iiittttt & 0x7f000000 == 0x34000000 (z =
size, i = delta = SignExtend(imm19:00, 64), t = Rt)
//
// LDR Wt 00011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x18000000 (i =
SignExtend(imm19:00, 64), t = Rt)
// LDR Xt 01011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x58000000 (i =
SignExtend(imm19:00, 64), t = Rt)
// LDRSW 10011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x98000000 (i =
SignExtend(imm19:00, 64), t = Rt)
// PRFM 11011000 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0xd8000000 (i =
SignExtend(imm19:00, 64), t = Rt)
// LDR St 00011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x1c000000 (i =
SignExtend(imm19:00, 64), t = Rt)
// LDR Dt 01011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x5c000000 (i =
SignExtend(imm19:00, 64), t = Rt)
// LDR Qt 10011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0x9c000000 (i =
SignExtend(imm19:00, 64), t = Rt)
// LDR inv 11011100 iiiiiiii iiiiiiii iiittttt & 0xff000000 == 0xdc000000 (i =
SignExtend(imm19:00, 64), t = Rt)
//
// TBNZ z0110111 bbbbbiii iiiiiiii iiittttt & 0x7f000000 == 0x37000000 (z =
size, b = bitnum, i = SignExtend(imm14:00, 64), t = Rt)
// TBZ z0110110 bbbbbiii iiiiiiii iiittttt & 0x7f000000 == 0x36000000 (z =
size, b = bitnum, i = SignExtend(imm14:00, 64), t = Rt)
//
class CDetourDis
{
public:
CDetourDis();
public:
typedef BYTE (CDetourDis::* COPYFUNC)(PBYTE pbDst, PBYTE pbSrc);
union AddImm12
{
DWORD Assembled;
struct
{
DWORD Rd : 5; // Destination register
DWORD Rn : 5; // Source register
DWORD Imm12 : 12; // 12-bit immediate
DWORD Shift : 2; // shift (must be 0 or 1)
DWORD Opcode1 : 7; // Must be 0010001 == 0x11
DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit
} s;
static DWORD Assemble(DWORD size, DWORD rd, DWORD rn, ULONG imm, DWORD
shift)
{
AddImm12 temp;
temp.s.Rd = rd;
temp.s.Rn = rn;
temp.s.Imm12 = imm & 0xfff;
temp.s.Shift = shift;
temp.s.Opcode1 = 0x11;
temp.s.Size = size;
return temp.Assembled;
}
static DWORD AssembleAdd32(DWORD rd, DWORD rn, ULONG imm, DWORD shift)
{ return Assemble(0, rd, rn, imm, shift); }
static DWORD AssembleAdd64(DWORD rd, DWORD rn, ULONG imm, DWORD shift)
{ return Assemble(1, rd, rn, imm, shift); }
};
union Adr19
{
DWORD Assembled;
struct
{
DWORD Rd : 5; // Destination register
DWORD Imm19 : 19; // 19-bit upper immediate
DWORD Opcode1 : 5; // Must be 10000 == 0x10
DWORD Imm2 : 2; // 2-bit lower immediate
DWORD Type : 1; // 0 = ADR, 1 = ADRP
} s;
inline LONG Imm() const { DWORD Imm = (s.Imm19 << 2) | s.Imm2; return
(LONG)(Imm << 11) >> 11; }
static DWORD Assemble(DWORD type, DWORD rd, LONG delta)
{
Adr19 temp;
temp.s.Rd = rd;
temp.s.Imm19 = (delta >> 2) & 0x7ffff;
temp.s.Opcode1 = 0x10;
temp.s.Imm2 = delta & 3;
temp.s.Type = type;
return temp.Assembled;
}
static DWORD AssembleAdr(DWORD rd, LONG delta) { return Assemble(0, rd,
delta); }
static DWORD AssembleAdrp(DWORD rd, LONG delta) { return Assemble(1, rd,
delta); }
};
union Bcc19
{
DWORD Assembled;
struct
{
DWORD Condition : 4; // Condition
DWORD Opcode1 : 1; // Must be 0
DWORD Imm19 : 19; // 19-bit immediate
DWORD Opcode2 : 8; // Must be 01010100 == 0x54
} s;
inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; }
static DWORD AssembleBcc(DWORD condition, LONG delta)
{
Bcc19 temp;
temp.s.Condition = condition;
temp.s.Opcode1 = 0;
temp.s.Imm19 = delta >> 2;
temp.s.Opcode2 = 0x54;
return temp.Assembled;
}
};
union Branch26
{
DWORD Assembled;
struct
{
DWORD Imm26 : 26; // 26-bit immediate
DWORD Opcode1 : 5; // Must be 00101 == 0x5
DWORD Link : 1; // 0 = B, 1 = BL
} s;
inline LONG Imm() const { return (LONG)(s.Imm26 << 6) >> 4; }
static DWORD Assemble(DWORD link, LONG delta)
{
Branch26 temp;
temp.s.Imm26 = delta >> 2;
temp.s.Opcode1 = 0x5;
temp.s.Link = link;
return temp.Assembled;
}
static DWORD AssembleB(LONG delta) { return Assemble(0, delta); }
static DWORD AssembleBl(LONG delta) { return Assemble(1, delta); }
};
union Br
{
DWORD Assembled;
struct
{
DWORD Opcode1 : 5; // Must be 00000 == 0
DWORD Rn : 5; // Register number
DWORD Opcode2 : 22; // Must be 1101011000011111000000 == 0x3587c0
for Br
// 0x358fc0
for Brl
} s;
static DWORD Assemble(DWORD rn, bool link)
{
Br temp;
temp.s.Opcode1 = 0;
temp.s.Rn = rn;
temp.s.Opcode2 = 0x3587c0;
if (link)
temp.Assembled |= 0x00200000;
return temp.Assembled;
}
static DWORD AssembleBr(DWORD rn)
{
return Assemble(rn, false);
}
static DWORD AssembleBrl(DWORD rn)
{
return Assemble(rn, true);
}
};
union Cbz19
{
DWORD Assembled;
struct
{
DWORD Rt : 5; // Register to test
DWORD Imm19 : 19; // 19-bit immediate
DWORD Nz : 1; // 0 = CBZ, 1 = CBNZ
DWORD Opcode1 : 6; // Must be 011010 == 0x1a
DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit
} s;
inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; }
static DWORD Assemble(DWORD size, DWORD nz, DWORD rt, LONG delta)
{
Cbz19 temp;
temp.s.Rt = rt;
temp.s.Imm19 = delta >> 2;
temp.s.Nz = nz;
temp.s.Opcode1 = 0x1a;
temp.s.Size = size;
return temp.Assembled;
}
};
union LdrLit19
{
DWORD Assembled;
struct
{
DWORD Rt : 5; // Destination register
DWORD Imm19 : 19; // 19-bit immediate
DWORD Opcode1 : 2; // Must be 0
DWORD FpNeon : 1; // 0 = LDR Wt/LDR Xt/LDRSW/PRFM, 1 = LDR St/LDR
Dt/LDR Qt
DWORD Opcode2 : 3; // Must be 011 = 3
DWORD Size : 2; // 00 = LDR Wt/LDR St, 01 = LDR Xt/LDR Dt, 10 =
LDRSW/LDR Qt, 11 = PRFM/invalid
} s;
inline LONG Imm() const { return (LONG)(s.Imm19 << 13) >> 11; }
static DWORD Assemble(DWORD size, DWORD fpneon, DWORD rt, LONG delta)
{
LdrLit19 temp;
temp.s.Rt = rt;
temp.s.Imm19 = delta >> 2;
temp.s.Opcode1 = 0;
temp.s.FpNeon = fpneon;
temp.s.Opcode2 = 3;
temp.s.Size = size;
return temp.Assembled;
}
};
union LdrFpNeonImm9
{
DWORD Assembled;
struct
{
DWORD Rt : 5; // Destination register
DWORD Rn : 5; // Base register
DWORD Imm12 : 12; // 12-bit immediate
DWORD Opcode1 : 1; // Must be 1 == 1
DWORD Opc : 1; // Part of size
DWORD Opcode2 : 6; // Must be 111101 == 0x3d
DWORD Size : 2; // Size (0=8-bit, 1=16-bit, 2=32-bit, 3=64-bit,
4=128-bit)
} s;
static DWORD Assemble(DWORD size, DWORD rt, DWORD rn, ULONG imm)
{
LdrFpNeonImm9 temp;
temp.s.Rt = rt;
temp.s.Rn = rn;
temp.s.Imm12 = imm;
temp.s.Opcode1 = 1;
temp.s.Opc = size >> 2;
temp.s.Opcode2 = 0x3d;
temp.s.Size = size & 3;
return temp.Assembled;
}
};
union Mov16
{
DWORD Assembled;
struct
{
DWORD Rd : 5; // Destination register
DWORD Imm16 : 16; // Immediate
DWORD Shift : 2; // Shift amount (0=0, 1=16, 2=32, 3=48)
DWORD Opcode : 6; // Must be 100101 == 0x25
DWORD Type : 2; // 0 = MOVN, 1 = reserved, 2 = MOVZ, 3 = MOVK
DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit
} s;
static DWORD Assemble(DWORD size, DWORD type, DWORD rd, DWORD imm, DWORD
shift)
{
Mov16 temp;
temp.s.Rd = rd;
temp.s.Imm16 = imm;
temp.s.Shift = shift;
temp.s.Opcode = 0x25;
temp.s.Type = type;
temp.s.Size = size;
return temp.Assembled;
}
static DWORD AssembleMovn32(DWORD rd, DWORD imm, DWORD shift) { return
Assemble(0, 0, rd, imm, shift); }
static DWORD AssembleMovn64(DWORD rd, DWORD imm, DWORD shift) { return
Assemble(1, 0, rd, imm, shift); }
static DWORD AssembleMovz32(DWORD rd, DWORD imm, DWORD shift) { return
Assemble(0, 2, rd, imm, shift); }
static DWORD AssembleMovz64(DWORD rd, DWORD imm, DWORD shift) { return
Assemble(1, 2, rd, imm, shift); }
static DWORD AssembleMovk32(DWORD rd, DWORD imm, DWORD shift) { return
Assemble(0, 3, rd, imm, shift); }
static DWORD AssembleMovk64(DWORD rd, DWORD imm, DWORD shift) { return
Assemble(1, 3, rd, imm, shift); }
};
union Tbz14
{
DWORD Assembled;
struct
{
DWORD Rt : 5; // Register to test
DWORD Imm14 : 14; // 14-bit immediate
DWORD Bit : 5; // 5-bit index
DWORD Nz : 1; // 0 = TBZ, 1 = TBNZ
DWORD Opcode1 : 6; // Must be 011011 == 0x1b
DWORD Size : 1; // 0 = 32-bit, 1 = 64-bit
} s;
inline LONG Imm() const { return (LONG)(s.Imm14 << 18) >> 16; }
static DWORD Assemble(DWORD size, DWORD nz, DWORD rt, DWORD bit, LONG
delta)
{
Tbz14 temp;
temp.s.Rt = rt;
temp.s.Imm14 = delta >> 2;
temp.s.Bit = bit;
temp.s.Nz = nz;
temp.s.Opcode1 = 0x1b;
temp.s.Size = size;
return temp.Assembled;
}
};
protected:
BYTE PureCopy32(BYTE* pSource, BYTE* pDest);
BYTE EmitMovImmediate(PULONG& pDstInst, BYTE rd, UINT64 immediate);
BYTE CopyAdr(BYTE* pSource, BYTE* pDest, ULONG instruction);
BYTE CopyBcc(BYTE* pSource, BYTE* pDest, ULONG instruction);
BYTE CopyB(BYTE* pSource, BYTE* pDest, ULONG instruction);
BYTE CopyBl(BYTE* pSource, BYTE* pDest, ULONG instruction);
BYTE CopyB_or_Bl(BYTE* pSource, BYTE* pDest, ULONG instruction, bool link);
BYTE CopyCbz(BYTE* pSource, BYTE* pDest, ULONG instruction);
BYTE CopyTbz(BYTE* pSource, BYTE* pDest, ULONG instruction);
BYTE CopyLdrLiteral(BYTE* pSource, BYTE* pDest, ULONG instruction);
protected:
ULONG GetInstruction(BYTE* pSource)
{
return ((PULONG)pSource)[0];
}
ULONG CopiedSize;
if ((Instruction & 0x1f000000) == 0x10000000) {
CopiedSize = CopyAdr(pSrc, pDst, Instruction);
} else if ((Instruction & 0xff000010) == 0x54000000) {
CopiedSize = CopyBcc(pSrc, pDst, Instruction);
} else if ((Instruction & 0x7c000000) == 0x14000000) {
CopiedSize = CopyB_or_Bl(pSrc, pDst, Instruction, (Instruction &
0x80000000) != 0);
} else if ((Instruction & 0x7e000000) == 0x34000000) {
CopiedSize = CopyCbz(pSrc, pDst, Instruction);
} else if ((Instruction & 0x7e000000) == 0x36000000) {
CopiedSize = CopyTbz(pSrc, pDst, Instruction);
} else if ((Instruction & 0x3b000000) == 0x18000000) {
CopiedSize = CopyLdrLiteral(pSrc, pDst, Instruction);
} else {
CopiedSize = PureCopy32(pSrc, pDst);
}
return pSrc + 4;
}
BYTE CDetourDis::EmitMovImmediate(PULONG& pDstInst, BYTE rd, UINT64 immediate)
{
DWORD piece[4];
piece[3] = (DWORD)((immediate >> 48) & 0xffff);
piece[2] = (DWORD)((immediate >> 32) & 0xffff);
piece[1] = (DWORD)((immediate >> 16) & 0xffff);
piece[0] = (DWORD)((immediate >> 0) & 0xffff);
int count = 0;
// ADR case
if (decoded.s.Type == 0)
{
BYTE* pTarget = pSource + decoded.Imm();
LONG64 delta = pTarget - pDest;
LONG64 deltaPage = ((ULONG_PTR)pTarget >> 12) - ((ULONG_PTR)pDest >> 12);
// output as ADR
if (delta >= -(1 << 20) && delta < (1 << 20))
{
EmitInstruction(pDstInst, Adr19::AssembleAdr(decoded.s.Rd,
(LONG)delta));
}
// ADRP case
else
{
BYTE* pTarget = (BYTE*)((((ULONG_PTR)pSource >> 12) + decoded.Imm()) <<
12);
LONG64 deltaPage = ((ULONG_PTR)pTarget >> 12) - ((ULONG_PTR)pDest >> 12);
// output as ADRP
if (deltaPage >= -(1 << 20) && (deltaPage < (1 << 20)))
{
EmitInstruction(pDstInst, Adr19::AssembleAdrp(decoded.s.Rd,
(LONG)deltaPage));
}
// output as BCC
if (delta >= -(1 << 20) && delta < (1 << 20))
{
EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition,
(LONG)delta));
}
// output as MOV x17, Target; BCC <skip>; BR x17 (BIG assumption that x17 isn't
being used for anything!!)
else
{
EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
EmitInstruction(pDstInst, Bcc19::AssembleBcc(decoded.s.Condition ^ 1, 8));
EmitInstruction(pDstInst, Br::AssembleBr(17));
}
// output as B or BRL
if (delta >= -(1 << 27) && (delta < (1 << 27)))
{
EmitInstruction(pDstInst, Branch26::Assemble(link, (LONG)delta));
}
// output as MOV x17, Target; BR or BRL x17 (BIG assumption that x17 isn't
being used for anything!!)
else
{
EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
EmitInstruction(pDstInst, Br::Assemble(17, link));
}
// output as CBZ/NZ
if (delta >= -(1 << 20) && delta < (1 << 20))
{
EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz,
decoded.s.Rt, (LONG)delta));
}
// output as MOV x17, Target; CBNZ/Z <skip>; BR x17 (BIG assumption that x17
isn't being used for anything!!)
else
{
EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
EmitInstruction(pDstInst, Cbz19::Assemble(decoded.s.Size, decoded.s.Nz ^ 1,
decoded.s.Rt, 8));
EmitInstruction(pDstInst, Br::AssembleBr(17));
}
// output as MOV x17, Target; TBNZ/Z <skip>; BR x17 (BIG assumption that x17
isn't being used for anything!!)
else
{
EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
EmitInstruction(pDstInst, Tbz14::Assemble(decoded.s.Size, decoded.s.Nz ^ 1,
decoded.s.Rt, decoded.s.Bit, 8));
EmitInstruction(pDstInst, Br::AssembleBr(17));
}
// output as LDR
if (delta >= -(1 << 21) && delta < (1 << 21))
{
EmitInstruction(pDstInst, LdrLit19::Assemble(decoded.s.Size,
decoded.s.FpNeon, decoded.s.Rt, (LONG)delta));
}
// FP/NEON register: compute address in x17 and load from there (BIG assumption
that x17 isn't being used for anything!!)
else
{
EmitMovImmediate(pDstInst, 17, (ULONG_PTR)pTarget);
EmitInstruction(pDstInst, LdrFpNeonImm9::Assemble(2 + decoded.s.Size,
decoded.s.Rt, 17, 0));
}
CDetourDis state;
return (PVOID)state.CopyInstruction((PBYTE)pDst,
(PBYTE)pSrc,
(PBYTE*)ppTarget,
plExtra);
}
#endif // DETOURS_ARM64
if (hModule != NULL) {
ULONG cbModule = DetourGetModuleSize(hModule);
pbBeg = (PBYTE)hModule;
pbEnd = (PBYTE)hModule + cbModule;
}
//
///////////////////////////////////////////////////////////////// End of File.