使用LLVM的MC库进行反汇编 - vimacs - 03-18-2021
最近想给一个项目加入反汇编功能,由于我们用的反汇编器是LLVM实现的,所以就学习一下LLVM的MCDisassembler. 以下代码用的是C的接口。
代码: // gcc -g -Wall -o disas-c disas-c.c -lLLVM
#include <llvm-c/Disassembler.h>
#include <llvm-c/Target.h>
#include <stdio.h>
int main()
{
// instruction bytes, but only one instruction is disassembled
uint8_t instBytes[] = { 0x48, 0x31, 0xc0, 0xcc }; // xor rax, rax; int3
// we need to reserve enough space, otherwise the disasm output will be truncated
char disasm[20];
// Initialize all the essential things
LLVMInitializeX86TargetInfo();
// We don't need LLVMInitializeX86Target();
LLVMInitializeX86TargetMC();
// We don't need LLVMInitializeX86AsmPrinter();
LLVMInitializeX86Disassembler();
LLVMDisasmContextRef disassembler = LLVMCreateDisasm("x86_64", NULL, 0, NULL, NULL);
if (!disassembler) {
fprintf(stderr, "Failed to create a disassembler.\n");
return 1;
}
// use Intel syntax, default is AT&T
if (!LLVMSetDisasmOptions(disassembler, LLVMDisassembler_Option_AsmPrinterVariant)) {
fprintf(stderr, "Failed to set disassembler printer variant.\n");
}
size_t disasm_bytes = LLVMDisasmInstruction(disassembler, instBytes, sizeof(instBytes), /*pc*/ 0, disasm, 20);
printf("%s\n", disasm);
printf("%ld bytes disassembled.\n", disasm_bytes);
// free the Disasm context
LLVMDisasmDispose(disassembler);
}
RE: 使用LLVM的MC库进行反汇编 - vimacs - 03-22-2021
下面是C++版本,参考的是 llvm/lib/MC/MCDisassembler/Disassembler.cpp 里面反汇编C接口的实现和 llvm/tools/llvm-objdump/llvm-objdump.cpp 反汇编功能的实现。
代码: // g++ -g -Wall -o disas disas.cc -lLLVM
#include <iostream>
#include <memory>
#include <string>
// TargetRegistry::LookupTarget
// class Target
#include <llvm/Support/TargetRegistry.h>
// LLVMInitializeX86TargetInfo
#include <llvm/Support/TargetSelect.h>
#include <llvm/MC/MCRegisterInfo.h>
#include <llvm/MC/MCAsmInfo.h>
#include <llvm/MC/MCContext.h>
#include <llvm/MC/MCDisassembler/MCDisassembler.h>
#include <llvm/MC/MCInst.h>
#include <llvm/ADT/ArrayRef.h>
#include <llvm/ADT/SmallVector.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/MC/MCInstrInfo.h>
#include <llvm/MC/MCInstPrinter.h>
int main()
{
LLVMInitializeX86TargetInfo();
// needed to create MCSubtargetInfo
LLVMInitializeX86TargetMC();
// MCContext, Disassembler
LLVMInitializeX86Disassembler();
std::string Error;
const llvm::Target *TheTarget = llvm::TargetRegistry::lookupTarget("x86_64", Error);
if (!TheTarget) {
std::cerr << Error << std::endl;
return 1;
}
std::unique_ptr<const llvm::MCSubtargetInfo> STI(TheTarget->createMCSubtargetInfo("x86_64", "", ""));
if (!STI) {
std::cerr << "Cannot create MCSubtargetInfo." << std::endl;
return 1;
}
// setup MCContext
std::unique_ptr<const llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo("x86_64"));
if (!MRI) {
std::cerr << "Cannot create MCRegInfo." << std::endl;
return 1;
}
// Get the assembler info needed to setup the MCContext.
llvm::MCTargetOptions MCOptions;
std::unique_ptr<const llvm::MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, "x86_64", MCOptions));
if (!MAI) {
std::cerr << "Cannot create MCAsmInfo." << std::endl;
return 1;
}
std::unique_ptr<llvm::MCContext> Ctx(new llvm::MCContext(MAI.get(), MRI.get(), nullptr));
if (!Ctx) {
std::cerr << "Cannot create MCContext." << std::endl;
return 1;
}
std::unique_ptr<llvm::MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI, *Ctx));
if (!DisAsm) {
std::cerr << "Cannot create MCContext." << std::endl;
return 1;
}
uint8_t instBytes[] = { 0x48, 0x31, 0xc0, 0xcc }; // xor rax, rax; int3
llvm::ArrayRef<uint8_t> Data(instBytes, 4);
llvm::MCInst Inst;
uint64_t InstSize;
llvm::SmallVector<char, 64> InsnStr;
llvm::raw_svector_ostream Annotations(InsnStr);
llvm::MCDisassembler::DecodeStatus S = DisAsm->getInstruction(Inst, InstSize, Data, /*PC*/ 0, Annotations);
if (S == llvm::MCDisassembler::Success) {
std::cout << "Successfully disassembled " << InstSize << " bytes." << std::endl;
} else {
std::cerr << "Disassembler fails to get instruction." << std::endl;
return 1;
}
// Set up the instruction printer.
std::unique_ptr<const llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII) {
std::cerr << "Cannot create MCInstrInfo." << std::endl;
return 1;
}
// int AsmPrinterVariant = MAI->getAssemblerDialect();
int AsmPrinterVariant = 1; // Use Intel style, default is AT&T
std::unique_ptr<llvm::MCInstPrinter> IP(TheTarget->createMCInstPrinter(llvm::Triple("x86_64"), AsmPrinterVariant, *MAI, *MII, *MRI));
if (!IP) {
std::cerr << "Cannot create MCInstPrinter." << std::endl;
return 1;
}
std::string assembly;
llvm::raw_string_ostream asm_stream(assembly);
IP->printInst(&Inst, /*address*/ 0, Annotations.str(), *STI, asm_stream);
std::cout << assembly << std::endl;
}
RE: 使用LLVM的MC库进行反汇编 - vimacs - 03-23-2021
个人觉得C接口比较简单,而且API很稳定,就是最后是用字符数组存反汇编输出,如果想要std::string的话还要转一次。
另外可以通过llvm-config获取编译和链接的命令行参数。
RE: 使用LLVM的MC库进行反汇编 - vimacs - 08-16-2022
刚刚试了一下在binutils里面用LLVM RISC-V反汇编库,用于实现某个自定义架构的反汇编功能。由于autotools用法比较复杂,所以我改成手动设置LIBS等变量。大概流程如下:
mkdir build; cd build
~/SourceCode/binutils-gdb/configure --target=some-target
make configure-zlib configure-libiberty configure-bfd configure-libctf
CPPFLAGS="$(llvm-config --cppflags)" make configure-opcodes
LIBS="$(llvm-config --link-static --libs --system-libs riscvdisassembler | xargs)" make configure-binutils
make all-zlib all-libiberty all-bfd all-libctf all-opcodes
make -C binutils CCLD=g++
|