Appendix C: C++ Competitive Programming Tricks
This appendix collects the most useful C++ tricks, macros, templates, and code snippets that competitive programmers use daily. These techniques can save significant time in contests and help your code run faster.
C.1 Fast I/O
The most important performance optimization for I/O-heavy problems:
// Always include these at the start of main()
ios_base::sync_with_stdio(false); // disconnect C and C++ I/O streams
cin.tie(NULL); // untie cin from cout
// Why they help:
// sync_with_stdio(false): by default, C++ syncs with C I/O (printf/scanf)
// for compatibility. Turning this off makes cin/cout much faster.
// cin.tie(NULL): by default, cin flushes cout before each read.
// Untying eliminates this unnecessary flush.
File I/O (USACO traditional problems):
freopen("problem.in", "r", stdin); // redirect cin to file (replace "problem" with actual name)
freopen("problem.out", "w", stdout); // redirect cout to file
// After these lines, cin/cout work as normal but read/write files
// Example: for "Blocked Billboard", use "billboard.in" / "billboard.out"
Even faster: manual reading with getchar_unlocked (Linux):
inline int readInt() {
int x = 0; bool neg = false;
char c = getchar_unlocked();
while (c != '-' && (c < '0' || c > '9')) c = getchar_unlocked();
if (c == '-') { neg = true; c = getchar_unlocked(); }
while (c >= '0' && c <= '9') { x = x*10 + c-'0'; c = getchar_unlocked(); }
return neg ? -x : x;
}
// Typically 3-5× faster than cin for large integer inputs
C.2 Common Macros and Typedefs
// Shorter type names
typedef long long ll;
typedef unsigned long long ull;
typedef long double ld;
typedef pair<int,int> pii;
typedef pair<ll,ll> pll;
typedef vector<int> vi;
typedef vector<ll> vll;
typedef vector<pii> vpii;
// Shorthand operations
#define pb push_back
#define pf push_front
#define all(v) (v).begin(), (v).end()
#define rall(v) (v).rbegin(), (v).rend()
#define sz(v) ((int)(v).size())
#define fi first
#define se second
// Loop macros (use sparingly — can hurt readability)
#define FOR(i, a, b) for(int i = (a); i < (b); i++)
#define REP(i, n) FOR(i, 0, n)
// Min/max shortcuts
#define chmin(a, b) a = min(a, b)
#define chmax(a, b) a = max(a, b)
// Usage examples:
// vi v; v.pb(5); → v.push_back(5)
// sort(all(v)); → sort(v.begin(), v.end())
// cout << sz(v) << "\n";→ cout << (int)v.size() << "\n"
// FOR(i, 1, n+1) { ... }→ for(int i = 1; i < n+1; i++) { ... }
C.3 GCC Pragmas for Speed
// These pragmas can give 2-4× speedup on GCC compilers (used on USACO judges)
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,bmi,bmi2,popcnt")
// Place these BEFORE #include lines
// Warning: "O3" and "avx2" may cause subtle numerical differences
// (usually fine for integer problems, be careful with floating point)
// Safer version (just O2 without vector instructions):
#pragma GCC optimize("O2")
// Full competitive template with pragmas:
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2")
#include <bits/stdc++.h>
using namespace std;
// ... rest of your code
C.4 Useful Math: GCD, LCM, Modular Arithmetic
#include <bits/stdc++.h>
using namespace std;
// ─── GCD and LCM ───────────────────────────────────────────────────────────
// C++17: std::gcd and std::lcm from <numeric>
#include <numeric>
int g = gcd(12, 8); // 4
int l = lcm(4, 6); // 12
// C++14 and earlier: __gcd from <algorithm>
int g2 = __gcd(12, 8); // 4
long long l2 = 4LL / __gcd(4, 6) * 6; // 12 (careful: divide first to avoid overflow)
// Custom GCD (Euclidean algorithm):
ll mygcd(ll a, ll b) { return b ? mygcd(b, a%b) : a; }
ll mylcm(ll a, ll b) { return a / mygcd(a,b) * b; } // divide first!
// ─── Modular Arithmetic ─────────────────────────────────────────────────────
const ll MOD = 1e9 + 7; // standard USACO/Codeforces modulus
// Add: (a + b) % MOD
ll addmod(ll a, ll b) { return (a + b) % MOD; }
// Subtract: (a - b + MOD) % MOD ← always add MOD before % to avoid negatives
ll submod(ll a, ll b) { return (a - b + MOD) % MOD; }
// Multiply: (a * b) % MOD
ll mulmod(ll a, ll b) { return (a % MOD) * (b % MOD) % MOD; }
// Power: a^b mod MOD using fast exponentiation — O(log b)
ll power(ll base, ll exp, ll mod = MOD) {
ll result = 1;
base %= mod;
while (exp > 0) {
if (exp & 1) result = result * base % mod; // odd exponent
base = base * base % mod; // square
exp >>= 1; // halve exponent
}
return result;
}
// Modular inverse (a^{-1} mod p, where p is prime):
ll modinv(ll a, ll mod = MOD) { return power(a, mod-2, mod); }
// This uses Fermat's little theorem: a^{p-1} ≡ 1 (mod p) for prime p
// So a^{-1} ≡ a^{p-2} (mod p)
// Modular division: (a / b) mod p = (a * b^{-1}) mod p
ll divmod(ll a, ll b) { return mulmod(a, modinv(b)); }
// Example: C(n, k) mod p using precomputed factorials
const int MAXN = 200001;
ll fact[MAXN], inv_fact[MAXN];
void precompute_factorials() {
fact[0] = 1;
for (int i = 1; i < MAXN; i++) fact[i] = fact[i-1] * i % MOD;
inv_fact[MAXN-1] = modinv(fact[MAXN-1]);
for (int i = MAXN-2; i >= 0; i--) inv_fact[i] = inv_fact[i+1] * (i+1) % MOD;
}
ll C(int n, int k) {
if (k < 0 || k > n) return 0;
return fact[n] * inv_fact[k] % MOD * inv_fact[n-k] % MOD;
}
C.5 Useful Code Snippets
Disjoint Set Union (DSU / Union-Find) Template
// DSU — complete template with size tracking
struct DSU {
vector<int> parent, sz;
DSU(int n) : parent(n+1), sz(n+1, 1) {
iota(parent.begin(), parent.end(), 0); // parent[i] = i
}
int find(int x) {
if (parent[x] != x) parent[x] = find(parent[x]); // path compression
return parent[x];
}
bool unite(int x, int y) {
x = find(x); y = find(y);
if (x == y) return false; // already same component
if (sz[x] < sz[y]) swap(x, y); // union by size
parent[y] = x;
sz[x] += sz[y];
return true; // successfully merged
}
bool connected(int x, int y) { return find(x) == find(y); }
int size(int x) { return sz[find(x)]; } // size of x's component
};
// Usage:
DSU dsu(n);
dsu.unite(1, 2);
cout << dsu.connected(1, 3) << "\n"; // 0 (false)
cout << dsu.size(1) << "\n"; // 2
Segment Tree (Point Update, Range Query)
// Segment Tree — supports:
// point_update(i, val): set position i to val
// query(l, r): sum of [l, r]
// All operations O(log N)
struct SegTree {
int n;
vector<ll> tree;
SegTree(int n) : n(n), tree(4*n, 0) {}
void update(int node, int start, int end, int idx, ll val) {
if (start == end) {
tree[node] = val;
return;
}
int mid = (start + end) / 2;
if (idx <= mid) update(2*node, start, mid, idx, val);
else update(2*node+1, mid+1, end, idx, val);
tree[node] = tree[2*node] + tree[2*node+1]; // merge
}
ll query(int node, int start, int end, int l, int r) {
if (r < start || end < l) return 0; // out of range
if (l <= start && end <= r) return tree[node]; // fully in range
int mid = (start + end) / 2;
return query(2*node, start, mid, l, r)
+ query(2*node+1, mid+1, end, l, r);
}
void update(int i, ll val) { update(1, 1, n, i, val); }
ll query(int l, int r) { return query(1, 1, n, l, r); }
};
// Usage:
SegTree st(n);
st.update(3, 10); // set position 3 to 10
cout << st.query(1, 5); // sum of positions 1..5
BFS Template
// Grid BFS — shortest path in unweighted grid
int bfs_grid(vector<string>& grid, int sr, int sc, int er, int ec) {
int R = grid.size(), C = grid[0].size();
vector<vector<int>> dist(R, vector<int>(C, -1));
queue<pair<int,int>> q;
int dr[] = {-1, 1, 0, 0};
int dc[] = {0, 0, -1, 1};
dist[sr][sc] = 0;
q.push({sr, sc});
while (!q.empty()) {
auto [r, c] = q.front(); q.pop();
for (int d = 0; d < 4; d++) {
int nr = r + dr[d], nc = c + dc[d];
if (nr >= 0 && nr < R && nc >= 0 && nc < C
&& grid[nr][nc] != '#' && dist[nr][nc] == -1) {
dist[nr][nc] = dist[r][c] + 1;
q.push({nr, nc});
}
}
}
return dist[er][ec];
}
Binary Search on Answer Template
// Binary search on answer — maximize X such that check(X) is true
// Precondition: check is monotone (false...false...true...true)
template<typename T, typename F>
T binary_search_ans(T lo, T hi, F check) {
T ans = lo; // or -1 if no valid answer
while (lo <= hi) {
T mid = lo + (hi - lo) / 2;
if (check(mid)) { ans = mid; lo = mid + 1; }
else { hi = mid - 1; }
}
return ans;
}
// Usage example: find max D such that canPlace(D) is true
int result = binary_search_ans(1, maxDist, canPlace);
C.6 Built-in Functions Worth Knowing
// ─── Integer operations ─────────────────────────────────────────────────────
__builtin_popcount(x) // count set bits in x (int)
__builtin_popcountll(x) // count set bits in x (long long)
__builtin_clz(x) // count leading zeros (int, x > 0)
__builtin_ctz(x) // count trailing zeros (int, x > 0)
// Examples:
__builtin_popcount(0b1011) == 3 // three 1-bits
__builtin_ctz(0b1000) == 3 // three trailing zeros
__builtin_clz(1) == 31 // 31 leading zeros (for 32-bit int)
(31 - __builtin_clz(x)) // floor(log2(x))
// ─── Bit tricks ─────────────────────────────────────────────────────────────
// Check if x is a power of 2:
bool isPow2 = (x > 0) && !(x & (x-1));
// Extract lowest set bit:
int lsb = x & (-x);
// Turn off lowest set bit:
x = x & (x-1);
// Iterate all subsets of a bitmask (for bitmask DP):
for (int sub = mask; sub > 0; sub = (sub-1) & mask) {
// process subset 'sub' of 'mask'
}
// ─── Useful STL functions ────────────────────────────────────────────────────
// next_permutation: iterate all permutations
sort(v.begin(), v.end()); // start from sorted order
do {
// v is current permutation
} while (next_permutation(v.begin(), v.end()));
// __gcd: greatest common divisor (available before C++17)
int g = __gcd(a, b);
// std::gcd, std::lcm (C++17 <numeric>):
#include <numeric>
int g = gcd(a, b);
int l = lcm(a, b);
C.7 The Full Competition Template
// ────────────────────────────────────────────────────────────────────────────
// Competitive Programming Template — C++17
// ────────────────────────────────────────────────────────────────────────────
#pragma GCC optimize("O2")
#include <bits/stdc++.h>
using namespace std;
// Type aliases
typedef long long ll;
typedef pair<int,int> pii;
typedef vector<int> vi;
// Convenience macros
#define pb push_back
#define all(v) (v).begin(), (v).end()
#define sz(v) ((int)(v).size())
#define fi first
#define se second
// Constants
const ll MOD = 1e9 + 7;
const ll INF = 1e18;
const int MAXN = 200005;
// Fast power mod
ll power(ll base, ll exp, ll mod = MOD) {
ll res = 1; base %= mod;
for (; exp > 0; exp >>= 1) {
if (exp & 1) res = res * base % mod;
base = base * base % mod;
}
return res;
}
int main() {
ios_base::sync_with_stdio(false);
cin.tie(NULL);
// Uncomment for file I/O:
// freopen("problem.in", "r", stdin);
// freopen("problem.out", "w", stdout);
// ── Your solution here ──
return 0;
}
C.8 Common Patterns and Idioms
// ─── Reading N integers into a vector ────────────────────────────────────────
int n; cin >> n;
vi a(n);
for (int &x : a) cin >> x;
// ─── 2D vector initialization ────────────────────────────────────────────────
int R, C;
vector<vector<int>> grid(R, vector<int>(C, 0));
// ─── Sorting with custom criterion ───────────────────────────────────────────
sort(all(v), [](const auto &a, const auto &b) {
return a.weight < b.weight; // sort by weight ascending
});
// ─── Finding min/max with index ───────────────────────────────────────────────
auto maxIt = max_element(all(v));
int maxVal = *maxIt;
int maxIdx = maxIt - v.begin();
// ─── Erase duplicates from sorted vector ─────────────────────────────────────
sort(all(v));
v.erase(unique(all(v)), v.end());
// ─── String splitting by character ───────────────────────────────────────────
vector<string> split(const string &s, char delim) {
vector<string> parts;
stringstream ss(s);
string part;
while (getline(ss, part, delim)) parts.pb(part);
return parts;
}
// ─── Integer square root (exact, no float issues) ───────────────────────────
ll isqrt(ll n) {
ll r = sqrtl(n);
while (r*r > n) r--;
while ((r+1)*(r+1) <= n) r++;
return r;
}
// ─── Checking if a number is prime ───────────────────────────────────────────
bool isPrime(ll n) {
if (n < 2) return false;
if (n == 2) return true;
if (n % 2 == 0) return false;
for (ll i = 3; i * i <= n; i += 2) {
if (n % i == 0) return false;
}
return true;
}
// ─── Sieve of Eratosthenes (all primes up to N) ─────────────────────────────
vector<bool> sieve(int N) {
vector<bool> is_prime(N+1, true);
is_prime[0] = is_prime[1] = false;
for (int i = 2; i * i <= N; i++) {
if (is_prime[i]) {
for (int j = i*i; j <= N; j += i)
is_prime[j] = false;
}
}
return is_prime;
}
C.9 Debugging Tips
// Use cerr for debug output (judges usually ignore stderr)
#ifdef DEBUG
#define dbg(x) cerr << #x << " = " << x << "\n"
#define dbgv(v) cerr << #v << ": "; for(auto x:v) cerr << x << " "; cerr << "\n"
#else
#define dbg(x)
#define dbgv(v)
#endif
// Compile with: g++ -DDEBUG -o sol sol.cpp (enables debug output)
// Compile without: g++ -o sol sol.cpp (removes debug output)
// Usage:
int x = 42;
dbg(x); // prints: x = 42 (only in debug mode)
vi v = {1,2,3};
dbgv(v); // prints: v: 1 2 3 (only in debug mode)
// Compile with sanitizers to catch memory errors and UB:
// g++ -fsanitize=address,undefined -O1 -o sol sol.cpp
// These are invaluable for catching:
// - Out-of-bounds array access
// - Integer overflow (with -fsanitize=signed-integer-overflow)
// - Use of uninitialized memory
// - Null pointer dereference
Fenwick Tree (BIT) — Prefix Sum with Updates
The Binary Indexed Tree (BIT or Fenwick Tree) uses the lowest set bit trick to achieve O(log N) prefix sum queries and updates. Each index i is "responsible" for the range [i - lowbit(i) + 1, i] where lowbit(i) = i & (-i).
// Fenwick Tree / BIT — O(log N) update and prefix query
struct BIT {
int n;
vector<long long> tree;
BIT(int n) : n(n), tree(n + 1, 0) {}
// Add val to position i (1-indexed)
void update(int i, long long val) {
for (; i <= n; i += i & (-i))
tree[i] += val;
}
// Prefix sum [1..i]
long long query(int i) {
long long sum = 0;
for (; i > 0; i -= i & (-i))
sum += tree[i];
return sum;
}
// Range sum [l..r]
long long query(int l, int r) { return query(r) - query(l - 1); }
};