# Nightfall Tempest Trials
## Overview
The challenge provides:
pk: Kyber public key (K=12)ct,iv: AES-256-CBC ciphertext and IVleaks: per-polynomial NTT internal state after each stage, with bounded noise
Goal: recover the secret key from the noisy NTT traces, derive the AES key, and decrypt the flag.
## Key Observations
- The leak is captured inside
ntt()after each stage (len = 128, 64, ..., 2), so each polynomial leak is 7 * 256 values. - The secret coefficients are small (CBD with eta=2), so the NTT input is in [-2, 2].
- Kyber’s
poly_ntt()callsntt()and thenpoly_reduce(). The leak is before the reduction, but the secret key bytes are produced after reduction. This is the critical detail for correct packing. - The NTT butterfly equations are linear over integers with the
fqmulMontgomery multiplication; we can model these as constraints and solve exactly.
## Approach
### 1) Parse the leak
Each of the 12 polynomials has 7 stages. For stage s the allowed range is:
leak[s][i] - DELTA[s] <= r_s[i] <= leak[s][i] + DELTA[s]
with DELTA = [240, 430, 600, 75, 70, 88, 99].
### 2) CP-SAT model per polynomial
For each polynomial, build a CP-SAT model (OR-Tools) with:
- input coefficients
x[i]in [-2, 2] - stage outputs
r_s[i]within noisy bounds - butterfly constraints for each stage
For each butterfly:
t = fqmul(zeta, b)
u + v = 2 * a
u - v = 2 * t
t = fqmul(zeta, b) is enforced via AddAllowedAssignments([b, t], pairs) over the small integer domain.
This yields all valid solutions for the final stage r6 (NTT output before reduction).
### 3) Reduce before packing
Kyber uses:
poly_ntt(r) = ntt(r); poly_reduce(r);
So we must apply poly_reduce() to each recovered r6 before poly_tobytes().
After reduction, deduplicate candidates by their reduced coefficient vectors.
### 4) Optionally, filter with public key
Regenerate matrix A from the seed in pk and check:
e = pk - A * sk
in the NTT domain, then invntt(e) should match the allowed CBD range after scaling (the usual Kyber scale = 2285 trick).
This prunes combinations but is not strictly required because the candidate counts are already small.
### 5) Derive key and decrypt
sk_bytes = concat(poly_tobytes(reduced_r6[i]) for i in 0..K-1)
key = sha256(sk_bytes)
pt = AES-256-CBC-Decrypt(ct, key, iv)
Check PKCS#7 padding and printable text.
## Result
Recovered plaintext:
0ctf{7n_the_Twilight_0f_br0k3n_ReAlm5_mY_deSt1ny_rise5_froM_7he_v0id.}
import ast
import hashlib
import itertools
import re
import subprocess
from hashlib import sha256
from ortools.sat.python import cp_model
Q = 3329
QINV = -3327
ZETAS = [
-1044, -758, -359, -1517, 1493, 1422, 287, 202,
-171, 622, 1577, 182, 962, -1202, -1474, 1468,
573, -1325, 264, 383, -829, 1458, -1602, -130,
-681, 1017, 732, 608, -1542, 411, -205, -1571,
1223, 652, -552, 1015, -1293, 1491, -282, -1544,
516, -8, -320, -666, -1618, -1162, 126, 1469,
-853, -90, -271, 830, 107, -1421, -247, -951,
-398, 961, -1508, -725, 448, -1065, 677, -1275,
-1103, 430, 555, 843, -1251, 871, 1550, 105,
422, 587, 177, -235, -291, -460, 1574, 1653,
-246, 778, 1159, -147, -777, 1483, -602, 1119,
-1590, 644, -872, 349, 418, 329, -156, -75,
817, 1097, 603, 610, 1322, -1285, -1465, 384,
-1215, -136, 1218, -1335, -874, 220, -1187, -1659,
-1185, -1530, -1278, 794, -1510, -854, -870, 478,
-108, -308, 996, 991, 958, -1460, 1522, 1628,
]
DELTA = [240, 430, 600, 75, 70, 88, 99]
K = 12
def int16(x):
x &= 0xFFFF
return x - 0x10000 if x & 0x8000 else x
def int32(x):
x &= 0xFFFFFFFF
return x - 0x100000000 if x & 0x8000 else x
def montgomery_reduce(a):
a = int32(a)
t = int16(int16(a) * QINV)
t = int16((a - int32(t) * Q) >> 16)
return t
def fqmul(a, b):
return montgomery_reduce(int32(a) * int32(b))
def parse_output(path):
text = open(path, "r", encoding="utf-8").read()
pk = re.search(r"pk\s*=\s*([0-9a-f]+)", text).group(1)
ct = re.search(r"ct\s*=\s*([0-9a-f]+)", text).group(1)
iv = re.search(r"iv\s*=\s*([0-9a-f]+)", text).group(1)
leaks_match = re.search(r"leaks\s*=\s*(\[.*\])\s*ct\s*=", text, flags=re.DOTALL)
leaks = ast.literal_eval(leaks_match.group(1))
return pk, leaks, ct, iv
def poly_tobytes(coeffs):
out = bytearray(384)
for i in range(128):
t0 = int16(coeffs[2 * i])
t0 += ((t0 >> 15) & Q)
t1 = int16(coeffs[2 * i + 1])
t1 += ((t1 >> 15) & Q)
out[3 * i + 0] = t0 & 0xFF
out[3 * i + 1] = ((t0 >> 8) | ((t1 & 0xFF) << 4)) & 0xFF
out[3 * i + 2] = (t1 >> 4) & 0xFF
return bytes(out)
def solve_poly_all(leak, max_solutions=None):
model = cp_model.CpModel()
x = [model.NewIntVar(-2, 2, f"x_{i}") for i in range(256)]
r = [[None] * 256 for _ in range(7)]
r_bounds = [[None] * 256 for _ in range(7)]
for s in range(7):
delta = DELTA[s]
for j in range(256):
lo = leak[s * 256 + j] - delta
hi = leak[s * 256 + j] + delta
if lo < -32768:
lo = -32768
if hi > 32767:
hi = 32767
r_bounds[s][j] = (lo, hi)
r[s][j] = model.NewIntVar(lo, hi, f"r_{s}_{j}")
for s in range(7):
len_s = 128 >> s
num_blocks = 256 // (2 * len_s)
k_base = 2 ** s
for block in range(num_blocks):
zeta = ZETAS[k_base + block]
start = block * 2 * len_s
for j in range(start, start + len_s):
if s == 0:
a = x[j]
b = x[j + len_s]
b_lo, b_hi = -2, 2
else:
a = r[s - 1][j]
b = r[s - 1][j + len_s]
b_lo, b_hi = r_bounds[s - 1][j + len_s]
u = r[s][j]
v = r[s][j + len_s]
pairs = [(bv, fqmul(zeta, bv)) for bv in range(b_lo, b_hi + 1)]
t_vals = [t for _, t in pairs]
t_lo = min(t_vals)
t_hi = max(t_vals)
t = model.NewIntVar(t_lo, t_hi, f"t_{s}_{j}")
model.AddAllowedAssignments([b, t], pairs)
model.Add(u + v == 2 * a)
model.Add(u - v == 2 * t)
class Collector(cp_model.CpSolverSolutionCallback):
def __init__(self, r6, limit=None):
super().__init__()
self.r6 = r6
self.limit = limit
self.solutions = []
def on_solution_callback(self):
self.solutions.append([self.Value(self.r6[i]) for i in range(256)])
if self.limit is not None and len(self.solutions) >= self.limit:
self.StopSearch()
solver = cp_model.CpSolver()
solver.parameters.enumerate_all_solutions = True
solver.parameters.num_search_workers = 1
collector = Collector(r[6], limit=max_solutions)
res = solver.Solve(model, collector)
if res != cp_model.OPTIMAL:
print(f"warning: enumeration incomplete ({solver.StatusName(res)})")
if not collector.solutions:
raise RuntimeError("no solutions found")
return collector.solutions
def poly_frombytes(a):
coeffs = [0] * 256
for i in range(128):
coeffs[2 * i] = ((a[3 * i + 0] >> 0) | (a[3 * i + 1] << 8)) & 0xFFF
coeffs[2 * i + 1] = ((a[3 * i + 1] >> 4) | (a[3 * i + 2] << 4)) & 0xFFF
return [int16(x) for x in coeffs]
def barrett_reduce(a):
a = int16(a)
v = ((1 << 26) + Q // 2) // Q
t = (int32(v) * a + (1 << 25)) >> 26
t = int16(t * Q)
return int16(a - t)
def poly_reduce(a):
return [barrett_reduce(x) for x in a]
def poly_add(a, b):
return [int16(x + y) for x, y in zip(a, b)]
def poly_sub(a, b):
return [int16(x - y) for x, y in zip(a, b)]
def poly_tomont(a):
f = (1 << 32) % Q
return [montgomery_reduce(int32(x) * f) for x in a]
def basemul(a0, a1, b0, b1, zeta):
r0 = fqmul(a1, b1)
r0 = fqmul(r0, zeta)
r0 = int16(r0 + fqmul(a0, b0))
r1 = fqmul(a0, b1)
r1 = int16(r1 + fqmul(a1, b0))
return r0, r1
def poly_basemul_montgomery(a, b):
r = [0] * 256
for i in range(64):
r0, r1 = basemul(
a[4 * i + 0],
a[4 * i + 1],
b[4 * i + 0],
b[4 * i + 1],
ZETAS[64 + i],
)
r2, r3 = basemul(
a[4 * i + 2],
a[4 * i + 3],
b[4 * i + 2],
b[4 * i + 3],
-ZETAS[64 + i],
)
r[4 * i + 0] = r0
r[4 * i + 1] = r1
r[4 * i + 2] = r2
r[4 * i + 3] = r3
return r
def polyvec_basemul_acc_montgomery(a_vec, b_vec):
r = poly_basemul_montgomery(a_vec[0], b_vec[0])
for i in range(1, K):
t = poly_basemul_montgomery(a_vec[i], b_vec[i])
r = poly_add(r, t)
return poly_reduce(r)
def invntt(r):
r = [int16(x) for x in r]
k = 127
length = 2
while length <= 128:
for start in range(0, 256, 2 * length):
zeta = ZETAS[k]
k -= 1
for j in range(start, start + length):
t = r[j]
r[j] = barrett_reduce(int16(t + r[j + length]))
r[j + length] = int16(r[j + length] - t)
r[j + length] = fqmul(zeta, r[j + length])
length <<= 1
f = 1441
for j in range(256):
r[j] = fqmul(r[j], f)
return r
class ShakeStream:
def __init__(self, seed):
self.seed = seed
self.buf = b""
self.off = 0
def _ensure(self, n):
need = self.off + n
if need <= len(self.buf):
return
new_len = max(need, len(self.buf) * 2, 512)
self.buf = hashlib.shake_128(self.seed).digest(new_len)
def take(self, n):
self._ensure(n)
out = self.buf[self.off : self.off + n]
self.off += n
return out
def gen_matrix(seed, transposed):
a = []
for i in range(K):
row = []
for j in range(K):
if transposed:
x, y = i, j
else:
x, y = j, i
stream = ShakeStream(seed + bytes([x, y]))
coeffs = []
while len(coeffs) < 256:
b0, b1, b2 = stream.take(3)
val0 = ((b0 >> 0) | (b1 << 8)) & 0xFFF
val1 = ((b1 >> 4) | (b2 << 4)) & 0xFFF
if val0 < Q:
coeffs.append(int16(val0))
if len(coeffs) < 256 and val1 < Q:
coeffs.append(int16(val1))
row.append(coeffs)
a.append(row)
return a
def iter_candidate_indices(candidates, pkpv, seed):
allowed = set()
scale = 2285
for x in range(-2, 3):
v = (x * scale) % Q
if v > Q // 2:
v -= Q
allowed.add(int16(v))
a = gen_matrix(seed, transposed=False)
prod = [[[] for _ in range(K)] for _ in range(K)]
for i in range(K):
for j in range(K):
for cand in candidates[j]:
prod[i][j].append(poly_basemul_montgomery(a[i][j], cand))
ranges = [range(len(candidates[j])) for j in range(K)]
for idxs in itertools.product(*ranges):
ok = True
for i in range(K):
acc = prod[i][0][idxs[0]][:]
for j in range(1, K):
acc = poly_add(acc, prod[i][j][idxs[j]])
acc = poly_reduce(acc)
acc = poly_tomont(acc)
e_ntt = poly_sub(pkpv[i], acc)
e_ntt = poly_reduce(e_ntt)
e_scaled = invntt(e_ntt)
if any(x not in allowed for x in e_scaled):
ok = False
break
if ok:
yield idxs
def main():
pk_hex, leaks, ct_hex, iv_hex = parse_output("output.txt")
candidates = []
for idx in range(K):
sols = solve_poly_all(leaks[idx])
reduced = []
seen = set()
for sol in sols:
red = poly_reduce(sol)
key = tuple(red)
if key in seen:
continue
seen.add(key)
reduced.append(red)
candidates.append(reduced)
if len(reduced) == len(sols):
print(f"poly {idx + 1}/{K}: {len(reduced)} candidates")
else:
print(f"poly {idx + 1}/{K}: {len(sols)} raw -> {len(reduced)} reduced candidates")
pk = bytes.fromhex(pk_hex)
pkpv = []
for i in range(K):
start = i * 384
pkpv.append(poly_frombytes(pk[start : start + 384]))
seed = pk[K * 384 : K * 384 + 32]
ct = bytes.fromhex(ct_hex)
iv = bytes.fromhex(iv_hex)
def looks_like_flag(data):
try:
text = data.decode("utf-8")
except UnicodeDecodeError:
return False
if not text:
return False
if any((ord(ch) < 32 or ord(ch) > 126) and ch not in "\r\n\t" for ch in text):
return False
if "{" in text and "}" in text:
return True
return "ctf" in text.lower()
def try_indices(indices_iter):
for idxs in indices_iter:
sk_parts = [poly_tobytes(candidates[i][idxs[i]]) for i in range(K)]
sk_bytes = b"".join(sk_parts)
key = sha256(sk_bytes).digest()
proc = subprocess.run(
["openssl", "enc", "-aes-256-cbc", "-d", "-K", key.hex(), "-iv", iv.hex()],
input=ct,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if proc.returncode == 0 and looks_like_flag(proc.stdout):
return proc.stdout
return None
flag = try_indices(iter_candidate_indices(candidates, pkpv, seed))
if flag is None:
ranges = [range(len(candidates[i])) for i in range(K)]
flag = try_indices(itertools.product(*ranges))
if flag is None:
raise RuntimeError("no valid flag found")
print(flag.decode("utf-8", errors="replace"))
if __name__ == "__main__":
main()Comments(0)
No comments yet. Be the first to share your thoughts!