LLVM從小白到放棄(一)- LLVM概述與LLVM環境搭建
LLVM的歷史
- LLVM計劃啓動於2000年,開始由美國UIUC大學的Chris Lattner博士主持開展,後來Apple也加入其中。最初的目的是開發一套提供中間代碼和編譯基礎設施的虛擬系統。
- LLVM命名最早源自於底層虛擬機(Low Level Virtual Machine)的縮寫,隨着LLVM項目的不斷髮展,原先的全稱已不再適用,目前LLVM就是該項目的全稱。
什麼是LLVM
- 廣義:LLVM是一個包括了很多模塊的編譯器框架。
- 狹義:LLVM特指LLVM項目中的LLVM Core和Clang子模塊。
- 簡單來收,可以將LLVM理解成爲一個現代化、可擴展的編譯器。
GCC的編譯流程
- GCC分爲三個模塊:前端、優化器和後端
暫時無法在文檔外展示此內容
- LLVM本質上也是三段式:
暫時無法在文檔外展示此內容
LLVM的編譯流程
- 一個具體的例子:
暫時無法在文檔外展示此內容
LLVM相對於GCC的優勢
優勢1:模塊化
- LLVM:LLVM是高度模塊化設計的,每一個模塊都可以從LLVM項目中抽離出來單獨使用。
- GCC:而GCC雖然也是三段式編譯,但各個模塊之間是難以抽離出來單獨使用的。
優勢2:可擴展
- LLVM:LLVM爲開發者提供了豐富的API,例如開發者可以通過LLVM Pass框架干預中間代碼優化過程,並且配備了完善的文檔
- GCC:雖然GCC是開源的,但要在GCC的基礎上進行擴展門檻很高、難度很大
LLVM編譯過程總結
- 對於C/C++程序來說,LLVM的編譯過程如圖所示:
LLVM環境搭建
Ubuntu/LLVM/CMake版本
- Ubuntu 20.04
- LLVM 12.0.1 / 9.0.9svn(ndkr21e)
- Cmake 3.21.1
第一步:下載LLVM-Core和Clang源代碼
https://github.com/llvm/llvm-project/releases/tag/llvmorg-12.0.1
clang-12.0.1.src.tar.xz
llvm-12.0.1.src.tar.xz
下載、解壓、並重命名,並在同一目錄下新建build文件夾,如下:
第二步:編譯LLVM項目
在同一文件夾內創建build.sh文件,內容如下:
cd build
cmake -G "Unix Makefiles" -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD="X86" -DBUILD_SHARED_LIBS=On ../llvm
make
make install
cmake 參數解釋:
- -G “Unix Makefiles”:生成Unix下的Makefile
- -DLLVM_ENABLE_PROJECTS=“clang”:除了 LLVM Core 外,還需要編譯的子項目。
- -DLLVM_BUILD_TYPE=Release:在cmake裏,有四種編譯模式:Debug, Release, RelWithDebInfo, 和MinSizeRel。使用 Release 模式編譯會節省很多空間。
- -DLLVM_TARGETS_TO_BUILD=“X86”:默認是ALL,選擇X86可節約很多編譯時間。
- -DBUILD_SHARED_LIBS=On:指定動態鏈接 LLVM 的庫,可以節省空間。
LLVM基本用法
第一步:將源代碼轉化成LLVM IR
#include "iostream"
using namespace std;
int main() {
cout << "Hello World!" << endl;
return 0;
}
LLVM IR 有兩種表現形式,一種是人類可閱讀的文本形式,對應文件後綴爲 .ll ;另一種是方便機器處 理的二進制格式,對應文件後綴爲 .bc 。使用以下命令將源代碼轉化爲 LLVM IR:
clang -S -emit-llvm hello.cpp -o hello.ll
或
clang -c -emit-llvm hello.cpp -o hello.bc
; ModuleID = 'hello.cpp'
source_filename = "hello.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%"class.std::ios_base::Init" = type { i8 }
%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_ios" }
%"class.std::basic_ios" = type { %"class.std::ios_base", %"class.std::basic_ostream"*, i8, i8, %"class.std::basic_streambuf"*, %"class.std::ctype"*, %"class.std::num_put"*, %"class.std::num_get"* }
%"class.std::ios_base" = type { i32 (...)**, i64, i64, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"class.std::locale" }
%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"class.std::ios_base"*, i32)*, i32, i32 }
%"struct.std::ios_base::_Words" = type { i8*, i64 }
%"class.std::locale" = type { %"class.std::locale::_Impl"* }
%"class.std::locale::_Impl" = type { i32, %"class.std::locale::facet"**, i64, %"class.std::locale::facet"**, i8** }
%"class.std::locale::facet" = type <{ i32 (...)**, i32, [4 x i8] }>
%"class.std::basic_streambuf" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"class.std::locale" }
%"class.std::ctype" = type <{ %"class.std::locale::facet.base", [4 x i8], %struct.__locale_struct*, i8, [7 x i8], i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8, [6 x i8] }>
%"class.std::locale::facet.base" = type <{ i32 (...)**, i32 }>
%struct.__locale_struct = type { [13 x %struct.__locale_data*], i16*, i32*, i32*, [13 x i8*] }
%struct.__locale_data = type opaque
%"class.std::num_put" = type { %"class.std::locale::facet.base", [4 x i8] }
%"class.std::num_get" = type { %"class.std::locale::facet.base", [4 x i8] }
@_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
@__dso_handle = external hidden global i8
@_ZSt4cout = external dso_local global %"class.std::basic_ostream", align 8
@.str = private unnamed_addr constant [13 x i8] c"Hello World!\00", align 1
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_hello.cpp, i8* null }]
; Function Attrs: noinline uwtable
define internal void @__cxx_global_var_init() #0 section ".text.startup" {
call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* nonnull dereferenceable(1) @_ZStL8__ioinit)
%1 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i32 0, i32 0), i8* @__dso_handle) #3
ret void
}
declare dso_local void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* nonnull dereferenceable(1)) unnamed_addr #1
; Function Attrs: nounwind
declare dso_local void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"* nonnull dereferenceable(1)) unnamed_addr #2
; Function Attrs: nounwind
declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #3
; Function Attrs: noinline norecurse optnone uwtable mustprogress
define dso_local i32 @main() #4 {
%1 = alloca i32, align 4
store i32 0, i32* %1, align 4
%2 = call nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"class.std::basic_ostream"* nonnull align 8 dereferenceable(8) @_ZSt4cout, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0))
%3 = call nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSolsEPFRSoS_E(%"class.std::basic_ostream"* nonnull dereferenceable(8) %2, %"class.std::basic_ostream"* (%"class.std::basic_ostream"*)* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_)
ret i32 0
}
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"class.std::basic_ostream"* nonnull align 8 dereferenceable(8), i8*) #1
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZNSolsEPFRSoS_E(%"class.std::basic_ostream"* nonnull dereferenceable(8), %"class.std::basic_ostream"* (%"class.std::basic_ostream"*)*) #1
declare dso_local nonnull align 8 dereferenceable(8) %"class.std::basic_ostream"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"class.std::basic_ostream"* nonnull align 8 dereferenceable(8)) #1
; Function Attrs: noinline uwtable
define internal void @_GLOBAL__sub_I_hello.cpp() #0 section ".text.startup" {
call void @__cxx_global_var_init()
ret void
}
attributes #0 = { noinline uwtable "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind }
attributes #4 = { noinline norecurse optnone uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 12.0.1"}
第二步:優化LLVM IR
使用opt指令對LLVM IR進行優化
opt -load LLVMObfuscator.so -hlw -S hello.ll -o hello_opt.ll
- -load 加載特定的LLVM Pass(集合)進行優化(通常爲.so文件)
- -hlw是LLVM Pass中自定義的參數,用來指定使用哪個Pass進行優化
第三步:編譯LLVM IR爲可執行文件
這一步我們通過Clang完成,從LLVM IR到可執行文件中間還有一系列複雜的流程,Clang幫助我們整合了這個過程
clang hello_opt.ll -o hello