Planet Clang

by Aaron Ballman at December 29, 2023 02:52 PM · deterministic

unsigned int i = UINT_MAX;
signed int j = i; // Implementation-defined behavior
signed int i = INT_MAX + 1; // Undefined behavior
if (something) {
  [[likely]];
  [[unlikely]];
  foo(something);
}
#define MY_COOL_ASSERT(x) [[unlikely]] assert(x)

if (something) {
  [[likely]];
  MY_COOL_ASSERT(something > 0);
  foo(something);
}
if (foo) [[likely]] {
  do_something(foo);
}

while (bar) [[unlikely]] {
  ;
}

switch (baz) {
[[likely]] case 0: whatever(); break;
[[unlikely]] case 1: something_else(); break;
default: break;
}
if (foo) [[likely]] { // A
  baz();
} else if (bar) [[likely]] { // B
  quux();
} else [[unlikely]] { // C
  bing();
}
if (foo) [[likely]] { // A
  baz();
} else [[likely]] if (bar) [[likely]] { // B
  quux();
} else [[unlikely]] { // C
  bing();
}
switch (foo) {
case 0:
[[likely]] case 1:
[[unlikely]] case 2: bar(); break;
[[likely]] case 3:
default: baz(); break;
if (foo) { // is this branch likely or unlikely?
  switch (*foo) {
  [[likely]] case 0: bar(); break;
  [[unlikely]] case 1: baz(); break;
  [[likely]] default: quux(); break;
  }
} else {
  ...
}
if (foo) { // Is this path likely?
  {
    [[likely]];
    SomeRAIIObject Obj;
    Obj.whatever(foo);
  }
} else {
  ...
}
if (ch == ' ') [[likely]] {
  goto whitespace;  // A
} else if (ch == '\n' || ch == '\t') [[unlikely]] {
  goto whitespace;  // B
} else {
  foo();
}
[[likely]] whitespace: bar();  // C
try {
  foo();
} catch (...) [[likely]] {
  dont_kill_people();
}

    .c --clang--> .bc

    .c --clang--> .bc

    .rs --+
          |
    .rs --+--rustc--> .bc
          |
    .rs --+

    .c --clang--> .bc --LLVM--> .bc (opt)

    .c --clang--> .bc --LLVM--> .bc (opt)

    .rs --+
          |
    .rs --+--rustc--> .bc --LLVM--> .bc (opt)
          |
    .rs --+

    .c --clang--> .bc --LLVM--> .bc (opt) --LLVM--> .o

    .c --clang--> .bc --LLVM--> .bc (opt) --LLVM--> .o

    .rs --+
          |
    .rs --+--rustc--> .bc --LLVM--> .bc (opt) --LLVM--> .o
          |
    .rs --+

    .c --clang--> .bc --LLVM--> .bc (opt) --LLVM--> .o ------+
                                                             |
    .c --clang--> .bc --LLVM--> .bc (opt) --LLVM--> .o ------+
                                                             |
                                                             +--ld--> bin
    .rs --+                                                  |
          |                                                  |
    .rs --+--rustc--> .bc --LLVM--> .bc (opt) --LLVM--> .o --+
          |
    .rs --+

    .c --clang--> .bc --LLVM--> .bc (opt) ------------------+ - - +
                                                            |     |
    .c --clang--> .bc --LLVM--> .bc (opt) ------------------+ - - +
                                                            |     |
                                                            +-ld+LLVM--> bin
    .rs --+                                                 |
          |                                                 |
    .rs --+--rustc--> .bc --LLVM--> .bc (opt) --LLVM--> .o -+
          |
    .rs --+

    .c --clang--> .bc --LLVM--> .bc (opt) ---------+
                                                   |
    .c --clang--> .bc --LLVM--> .bc (opt) ---------+
                                                   |
                                                   +-ld+LLVM--> bin
    .rs --+                                        |
          |                                        |
    .rs --+--rustc--> .bc --LLVM--> .bc (opt) -----+
          |
    .rs --+

    # Compile the Rust static library, called "xyz"
    rustc --crate-type=staticlib -O -C linker-plugin-lto -o libxyz.a lib.rs

    # Compile the C code with "-flto"
    clang -flto -c -O2 main.c

    # Link everything
    clang -flto -O2 main.o -L . -lxyz

struct __declspec(dllexport) S {
  void foo() {}
};

struct __declspec(dllimport) S {
  void foo() {}
};

void internal();

struct __declspec(dllimport) S {
  void foo() { internal(); }
}

inline int internal() { static int x; return x++; }

struct __declspec(dllimport) S {
  int foo() { return internal(); }
}

import pydffi
CU = pydffi.FFI().cdef("int puts(const char* s);");
CU.funcs.puts("hello world!")

import pydffi
pydffi.dlopen("/path/to/libarchive.so")
CU = pydffi.FFI().cdef("#include <archive.h>")
a = funcs.archive_read_new()
assert a
...

$ pip install pydffi

libffi
cffi
PyCParser
libffi
PyCParser
lldb
eval()
lldb
typedef struct {
  short a;
  int b;
} A;

void print_A(A s) {
  printf("%d %d\n", s.a, s.b);
}

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

@.str = private unnamed_addr constant [7 x i8] c"%d %d\0A\00", align 1

define void @print_A(i64) local_unnamed_addr {
  %2 = trunc i64 %0 to i32
  %3 = lshr i64 %0, 32
  %4 = trunc i64 %3 to i32
  %5 = shl i32 %2, 16
  %6 = ashr exact i32 %5, 16
  %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i32 %6, i32 %4)
  ret void
}

llvm::ArrayRef
[...]
|-RecordDecl 0x5561d7f9fc20 <a.c:1:9, line:4:1> line:1:9 struct definition
| |-FieldDecl 0x5561d7ff4750 <line:2:3, col:9> col:9 referenced a 'short'
| `-FieldDecl 0x5561d7ff47b0 <line:3:3, col:7> col:7 referenced b 'int'

target triple = "x86_64-pc-linux-gnu"
%struct.A = type { i16, i32 }
@.str = private unnamed_addr constant [7 x i8] c"%d %d\0A\00", align 1

define void @print_A(i64) local_unnamed_addr !dbg !7 {
  %2 = trunc i64 %0 to i32
  %3 = lshr i64 %0, 32
  %4 = trunc i64 %3 to i32
  tail call void @llvm.dbg.value(metadata i32 %4, i64 0, metadata !18, metadata !19), !dbg !20
  tail call void @llvm.dbg.declare(metadata %struct.A* undef, metadata !18, metadata !21), !dbg !20
  %5 = shl i32 %2, 16, !dbg !22
  %6 = ashr exact i32 %5, 16, !dbg !22
  %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([...] @.str, i64 0, i64 0), i32 %6, i32 %4), !dbg !23
  ret void, !dbg !24
}

[...]
; DISubprogram defines (in our case) a C function, with its full type
!7 = distinct !DISubprogram(name: "print_A", scope: !1, file: !1, line: 6, type: !8, [...], variables: !17)
; This defines the type of our subprogram
!8 = !DISubroutineType(types: !9)
; We have the "original" types used for print_A, with the first one being the
; return type (null => void), and the other ones the arguments (in !10)
!9 = !{null, !10}
!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "A", file: !1, line: 4, baseType: !11)
; This defines our structure, with its various fields
!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !1, line: 1, size: 64, elements: !12)
!12 = !{!13, !15}
; We have here the size and name of the member "a". Offset is 0 (default value)
!13 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !11, file: !1, line: 2, baseType: !14, size: 16)
!14 = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed)
; We have here the size, offset and name of the member "b"
!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !11, file: !1, line: 3, baseType: !16, size: 32, offset: 32)
!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
[...]

DFFI FFI([...]);
// This will declare puts as a function that returns int and takes a const

// char* as an argument. We could also create this function type by hand.
CompilationUnit CU = FFI.cdef("int puts(const char* s);", [...]);
NativeFunc F = CU.getFunction("puts");
const char* s = "hello world!";
void* Args[] = {&s};
int Ret;
F.call(&Ret, Args);

void*
void*
void*
puts

void call_puts(void* Ret, void** Args) {
  *((int*)Ret) = puts((const char*) Args[0]);
}

typedef void(*puts_call_ty)(void*, void**);
puts_call_ty Wrapper = /* pointer to the compiled wrapper function */;
Wrapper(Ret, Args);

libffi
puts
void __dffi_wrapper_0(int32_t( __attribute__((cdecl)) *__FPtr)(char *), int32_t *__Ret, void** __Args) {
  *__Ret = (__FPtr)(*((char **)__Args[0]));
}

DFFI::cdef
DFFI::compile
CompilationUnit::getFunction
DFFI::cdef
-g -femit-all-decls
typedef struct {
  short a;
  int b;
} A;

void print_A(A s);

$ clang -S -emit-llvm -g -femit-all-decls -o - a.c |grep print_A |wc -l
0

print_A

void __dffi_force_decl_print_A(A s) { }

__dffi_force_decl_print_A
DFFI::compile
TODO
libarchive
libffi
libffi
void*
DFFI::cdef
DFFI::cdef
DFFI::compile
libffi
cdef
compile
#dragonffi

Stable	6.0
Qualification	7
Development	8

	mini_installer.exe	chrome.dll	chrome_child.dll	chrome.exe
32-bit win-pgo	45.46 MB	36.47 MB	53.76 MB	1.38 MB
32-bit win-clang	45.65 MB (+0.04%)	42.56 MB (+16.7%)	62.38 MB (+16%)	1.45 MB (+5.1%)
64-bit win-pgo	49.4 MB	53.3 MB	65.6 MB	1.6 MB
64-bit win-clang	46.27 MB (-6.33%)	50.6 MB (-5.1%)	72.71 MB (+10.8%)	1.57 MB (-1.2%)

Hash Size (Bytes)	Average # of records needed for a collision
4	82,137
6	21,027,121
8	5,382,943,231
12	3.53 x 10¹⁴
16	2.31 x 10¹⁹
20	1.52 x 10²⁴

Toolchain	Mode	Target
Toolchain	Mode	blink_core.dll	content.dll	chrome.dll	clang.exe
MSVC	/DEBUG:FULL	553.11s	205.45s	507.17s	62.45s
MSVC	/DEBUG:FASTLINK	116.77s	56.05s	67.80s	29.37s
lld-link	/DEBUG:FULL	121.17s	42.10s	42.31s	24.14s
lld-link	/DEBUG:GHASH	88.71s	33.30s	34.76s	17.99s

Version	Build failures	Ignoring haskell pkgs
3.8		1367 / 5.6%
3.9	2274 / 8.1%	1618 / 5.8%
4.0	2311 / 8.3%	1655 / 5.9%
5.0		1445 / 5.1%

Planet Clang

December 29, 2023

Existing code is important, existing implementations are not.A standard is a treaty between implementor and programmer.Migration of an existing code base is an issue.Minimize incompatibilities with C90.

C code can be portable.C code can be non-portable.

Avoid quiet changes.

Keep the spirit of C.

Support international programming.

Codify existing practice to address evident deficiencies.Unlike for C99, the consensus at the London meeting was that there should be no invention, without exception.

Minimize incompatibilities with C++

Maintain conceptual simplicity.

Trust the programmer, as a goal, is outdated in respect to the security and safety programming communities.

Application Programming Interfaces (APIs) should be self-documenting when possible.

What’s Missing?

Closing Thoughts

October 01, 2023

Errata

August 27, 2020

June 02, 2020

TLDR

Qmake

Symbol differences

Next steps

For maintainers & upstream

Conclusion

April 22, 2020

Motivation

Using the _ExtInt Language Feature

History

Future Extensions

Conclusion

March 22, 2020

Results

Errors found

Current status

Next steps

Results

Errors found

Current status

Next steps

November 07, 2019

What is a deterministic build, and its advantages

Plan of attack

Getting to basic determinism

Getting to incremental determinism

Getting to local determinism

Getting to universal determinism

Summary

September 19, 2019

Background - A bird's eye view of the LLVM compilation pipeline

Link time optimization in LLVM

Cross-language link time optimization

Using cross-language LTO: a minimal example

Conclusion

Acknowledgments

September 04, 2019

August 01, 2019

The LLVM Project is Moving to GitHub

May 24, 2019

March 18, 2019

March 15, 2019

March 07, 2019

November 14, 2018

Background

/Zc:dllexportInlines-

Compatibility

Summary

More information

Acknowledgements

September 26, 2018

September 25, 2018

September 18, 2018

August 24, 2018

April 10, 2018

Keynotes

Tutorials

Talks

BoFs

Student Research Competition

Lightning Talks

Posters

Existing code is important, existing implementations are not.
A standard is a treaty between implementor and programmer.
Migration of an existing code base is an issue.
Minimize incompatibilities with C90.

C code can be portable.
C code can be non-portable.

Codify existing practice to address evident deficiencies.
Unlike for C99, the consensus at the London meeting was that there should be no invention, without exception.