Appendix C: C++ Competitive Programming Tricks

This appendix collects the most useful C++ tricks, macros, templates, and code snippets that competitive programmers use daily. These techniques can save significant time in contests and help your code run faster.

C.1 Fast I/O

The most important performance optimization for I/O-heavy problems:

// Always include these at the start of main()
ios_base::sync_with_stdio(false);  // disconnect C and C++ I/O streams
cin.tie(NULL);                      // untie cin from cout

// Why they help:
// sync_with_stdio(false): by default, C++ syncs with C I/O (printf/scanf)
//   for compatibility. Turning this off makes cin/cout much faster.
// cin.tie(NULL): by default, cin flushes cout before each read.
//   Untying eliminates this unnecessary flush.

File I/O (USACO traditional problems):

freopen("problem.in",  "r", stdin);   // redirect cin to file (replace "problem" with actual name)
freopen("problem.out", "w", stdout);  // redirect cout to file
// After these lines, cin/cout work as normal but read/write files
// Example: for "Blocked Billboard", use "billboard.in" / "billboard.out"

Even faster: manual reading with getchar_unlocked (Linux):

inline int readInt() {
    int x = 0; bool neg = false;
    char c = getchar_unlocked();
    while (c != '-' && (c < '0' || c > '9')) c = getchar_unlocked();
    if (c == '-') { neg = true; c = getchar_unlocked(); }
    while (c >= '0' && c <= '9') { x = x*10 + c-'0'; c = getchar_unlocked(); }
    return neg ? -x : x;
}
// Typically 3-5× faster than cin for large integer inputs

C.2 Common Macros and Typedefs

// Shorter type names
typedef long long ll;
typedef unsigned long long ull;
typedef long double ld;
typedef pair<int,int> pii;
typedef pair<ll,ll> pll;
typedef vector<int> vi;
typedef vector<ll> vll;
typedef vector<pii> vpii;

// Shorthand operations
#define pb push_back
#define pf push_front
#define all(v) (v).begin(), (v).end()
#define rall(v) (v).rbegin(), (v).rend()
#define sz(v) ((int)(v).size())
#define fi first
#define se second

// Loop macros (use sparingly — can hurt readability)
#define FOR(i, a, b) for(int i = (a); i < (b); i++)
#define REP(i, n) FOR(i, 0, n)

// Min/max shortcuts
#define chmin(a, b) a = min(a, b)
#define chmax(a, b) a = max(a, b)

// Usage examples:
// vi v; v.pb(5);        → v.push_back(5)
// sort(all(v));         → sort(v.begin(), v.end())
// cout << sz(v) << "\n";→ cout << (int)v.size() << "\n"
// FOR(i, 1, n+1) { ... }→ for(int i = 1; i < n+1; i++) { ... }

C.3 GCC Pragmas for Speed

// These pragmas can give 2-4× speedup on GCC compilers (used on USACO judges)
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,bmi,bmi2,popcnt")

// Place these BEFORE #include lines
// Warning: "O3" and "avx2" may cause subtle numerical differences
//   (usually fine for integer problems, be careful with floating point)

// Safer version (just O2 without vector instructions):
#pragma GCC optimize("O2")

// Full competitive template with pragmas:
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2")
#include <bits/stdc++.h>
using namespace std;
// ... rest of your code

C.4 Useful Math: GCD, LCM, Modular Arithmetic

#include <bits/stdc++.h>
using namespace std;

// ─── GCD and LCM ───────────────────────────────────────────────────────────

// C++17: std::gcd and std::lcm from <numeric>
#include <numeric>
int g = gcd(12, 8);            // 4
int l = lcm(4, 6);             // 12

// C++14 and earlier: __gcd from <algorithm>
int g2 = __gcd(12, 8);         // 4
long long l2 = 4LL / __gcd(4, 6) * 6;  // 12 (careful: divide first to avoid overflow)

// Custom GCD (Euclidean algorithm):
ll mygcd(ll a, ll b) { return b ? mygcd(b, a%b) : a; }
ll mylcm(ll a, ll b) { return a / mygcd(a,b) * b; }  // divide first!

// ─── Modular Arithmetic ─────────────────────────────────────────────────────

const ll MOD = 1e9 + 7;  // standard USACO/Codeforces modulus

// Add: (a + b) % MOD
ll addmod(ll a, ll b) { return (a + b) % MOD; }

// Subtract: (a - b + MOD) % MOD  ← always add MOD before % to avoid negatives
ll submod(ll a, ll b) { return (a - b + MOD) % MOD; }

// Multiply: (a * b) % MOD
ll mulmod(ll a, ll b) { return (a % MOD) * (b % MOD) % MOD; }

// Power: a^b mod MOD using fast exponentiation — O(log b)
ll power(ll base, ll exp, ll mod = MOD) {
    ll result = 1;
    base %= mod;
    while (exp > 0) {
        if (exp & 1) result = result * base % mod;  // odd exponent
        base = base * base % mod;                    // square
        exp >>= 1;                                   // halve exponent
    }
    return result;
}

// Modular inverse (a^{-1} mod p, where p is prime):
ll modinv(ll a, ll mod = MOD) { return power(a, mod-2, mod); }
// This uses Fermat's little theorem: a^{p-1} ≡ 1 (mod p) for prime p
// So a^{-1} ≡ a^{p-2} (mod p)

// Modular division: (a / b) mod p = (a * b^{-1}) mod p
ll divmod(ll a, ll b) { return mulmod(a, modinv(b)); }

// Example: C(n, k) mod p using precomputed factorials
const int MAXN = 200001;
ll fact[MAXN], inv_fact[MAXN];

void precompute_factorials() {
    fact[0] = 1;
    for (int i = 1; i < MAXN; i++) fact[i] = fact[i-1] * i % MOD;
    inv_fact[MAXN-1] = modinv(fact[MAXN-1]);
    for (int i = MAXN-2; i >= 0; i--) inv_fact[i] = inv_fact[i+1] * (i+1) % MOD;
}

ll C(int n, int k) {
    if (k < 0 || k > n) return 0;
    return fact[n] * inv_fact[k] % MOD * inv_fact[n-k] % MOD;
}

C.5 Useful Code Snippets

Disjoint Set Union (DSU / Union-Find) Template

// DSU — complete template with size tracking
struct DSU {
    vector<int> parent, sz;

    DSU(int n) : parent(n+1), sz(n+1, 1) {
        iota(parent.begin(), parent.end(), 0);  // parent[i] = i
    }

    int find(int x) {
        if (parent[x] != x) parent[x] = find(parent[x]);  // path compression
        return parent[x];
    }

    bool unite(int x, int y) {
        x = find(x); y = find(y);
        if (x == y) return false;              // already same component
        if (sz[x] < sz[y]) swap(x, y);        // union by size
        parent[y] = x;
        sz[x] += sz[y];
        return true;                            // successfully merged
    }

    bool connected(int x, int y) { return find(x) == find(y); }
    int size(int x) { return sz[find(x)]; }     // size of x's component
};

// Usage:
DSU dsu(n);
dsu.unite(1, 2);
cout << dsu.connected(1, 3) << "\n";   // 0 (false)
cout << dsu.size(1) << "\n";           // 2

Segment Tree (Point Update, Range Query)

// Segment Tree — supports:
//   point_update(i, val): set position i to val
//   query(l, r): sum of [l, r]
// All operations O(log N)

struct SegTree {
    int n;
    vector<ll> tree;

    SegTree(int n) : n(n), tree(4*n, 0) {}

    void update(int node, int start, int end, int idx, ll val) {
        if (start == end) {
            tree[node] = val;
            return;
        }
        int mid = (start + end) / 2;
        if (idx <= mid) update(2*node, start, mid, idx, val);
        else            update(2*node+1, mid+1, end, idx, val);
        tree[node] = tree[2*node] + tree[2*node+1];  // merge
    }

    ll query(int node, int start, int end, int l, int r) {
        if (r < start || end < l) return 0;           // out of range
        if (l <= start && end <= r) return tree[node]; // fully in range
        int mid = (start + end) / 2;
        return query(2*node, start, mid, l, r)
             + query(2*node+1, mid+1, end, l, r);
    }

    void update(int i, ll val) { update(1, 1, n, i, val); }
    ll query(int l, int r) { return query(1, 1, n, l, r); }
};

// Usage:
SegTree st(n);
st.update(3, 10);           // set position 3 to 10
cout << st.query(1, 5);     // sum of positions 1..5

BFS Template

// Grid BFS — shortest path in unweighted grid
int bfs_grid(vector<string>& grid, int sr, int sc, int er, int ec) {
    int R = grid.size(), C = grid[0].size();
    vector<vector<int>> dist(R, vector<int>(C, -1));
    queue<pair<int,int>> q;
    int dr[] = {-1, 1, 0, 0};
    int dc[] = {0, 0, -1, 1};

    dist[sr][sc] = 0;
    q.push({sr, sc});

    while (!q.empty()) {
        auto [r, c] = q.front(); q.pop();
        for (int d = 0; d < 4; d++) {
            int nr = r + dr[d], nc = c + dc[d];
            if (nr >= 0 && nr < R && nc >= 0 && nc < C
                && grid[nr][nc] != '#' && dist[nr][nc] == -1) {
                dist[nr][nc] = dist[r][c] + 1;
                q.push({nr, nc});
            }
        }
    }
    return dist[er][ec];
}

Binary Search on Answer Template

// Binary search on answer — maximize X such that check(X) is true
// Precondition: check is monotone (false...false...true...true)
template<typename T, typename F>
T binary_search_ans(T lo, T hi, F check) {
    T ans = lo;  // or -1 if no valid answer
    while (lo <= hi) {
        T mid = lo + (hi - lo) / 2;
        if (check(mid)) { ans = mid; lo = mid + 1; }
        else { hi = mid - 1; }
    }
    return ans;
}

// Usage example: find max D such that canPlace(D) is true
int result = binary_search_ans(1, maxDist, canPlace);

C.6 Built-in Functions Worth Knowing

// ─── Integer operations ─────────────────────────────────────────────────────

__builtin_popcount(x)      // count set bits in x (int)
__builtin_popcountll(x)    // count set bits in x (long long)
__builtin_clz(x)           // count leading zeros (int, x > 0)
__builtin_ctz(x)           // count trailing zeros (int, x > 0)

// Examples:
__builtin_popcount(0b1011) == 3       // three 1-bits
__builtin_ctz(0b1000)      == 3       // three trailing zeros
__builtin_clz(1)           == 31      // 31 leading zeros (for 32-bit int)
(31 - __builtin_clz(x))              // floor(log2(x))

// ─── Bit tricks ─────────────────────────────────────────────────────────────

// Check if x is a power of 2:
bool isPow2 = (x > 0) && !(x & (x-1));

// Extract lowest set bit:
int lsb = x & (-x);

// Turn off lowest set bit:
x = x & (x-1);

// Iterate all subsets of a bitmask (for bitmask DP):
for (int sub = mask; sub > 0; sub = (sub-1) & mask) {
    // process subset 'sub' of 'mask'
}

// ─── Useful STL functions ────────────────────────────────────────────────────

// next_permutation: iterate all permutations
sort(v.begin(), v.end());    // start from sorted order
do {
    // v is current permutation
} while (next_permutation(v.begin(), v.end()));

// __gcd: greatest common divisor (available before C++17)
int g = __gcd(a, b);

// std::gcd, std::lcm (C++17 <numeric>):
#include <numeric>
int g = gcd(a, b);
int l = lcm(a, b);

C.7 The Full Competition Template

// ────────────────────────────────────────────────────────────────────────────
// Competitive Programming Template — C++17
// ────────────────────────────────────────────────────────────────────────────
#pragma GCC optimize("O2")
#include <bits/stdc++.h>
using namespace std;

// Type aliases
typedef long long ll;
typedef pair<int,int> pii;
typedef vector<int> vi;

// Convenience macros
#define pb push_back
#define all(v) (v).begin(), (v).end()
#define sz(v) ((int)(v).size())
#define fi first
#define se second

// Constants
const ll MOD = 1e9 + 7;
const ll INF = 1e18;
const int MAXN = 200005;

// Fast power mod
ll power(ll base, ll exp, ll mod = MOD) {
    ll res = 1; base %= mod;
    for (; exp > 0; exp >>= 1) {
        if (exp & 1) res = res * base % mod;
        base = base * base % mod;
    }
    return res;
}

int main() {
    ios_base::sync_with_stdio(false);
    cin.tie(NULL);

    // Uncomment for file I/O:
    // freopen("problem.in", "r", stdin);
    // freopen("problem.out", "w", stdout);

    // ── Your solution here ──

    return 0;
}

C.8 Common Patterns and Idioms

// ─── Reading N integers into a vector ────────────────────────────────────────
int n; cin >> n;
vi a(n);
for (int &x : a) cin >> x;

// ─── 2D vector initialization ────────────────────────────────────────────────
int R, C;
vector<vector<int>> grid(R, vector<int>(C, 0));

// ─── Sorting with custom criterion ───────────────────────────────────────────
sort(all(v), [](const auto &a, const auto &b) {
    return a.weight < b.weight;  // sort by weight ascending
});

// ─── Finding min/max with index ───────────────────────────────────────────────
auto maxIt = max_element(all(v));
int maxVal = *maxIt;
int maxIdx = maxIt - v.begin();

// ─── Erase duplicates from sorted vector ─────────────────────────────────────
sort(all(v));
v.erase(unique(all(v)), v.end());

// ─── String splitting by character ───────────────────────────────────────────
vector<string> split(const string &s, char delim) {
    vector<string> parts;
    stringstream ss(s);
    string part;
    while (getline(ss, part, delim)) parts.pb(part);
    return parts;
}

// ─── Integer square root (exact, no float issues) ───────────────────────────
ll isqrt(ll n) {
    ll r = sqrtl(n);
    while (r*r > n) r--;
    while ((r+1)*(r+1) <= n) r++;
    return r;
}

// ─── Checking if a number is prime ───────────────────────────────────────────
bool isPrime(ll n) {
    if (n < 2) return false;
    if (n == 2) return true;
    if (n % 2 == 0) return false;
    for (ll i = 3; i * i <= n; i += 2) {
        if (n % i == 0) return false;
    }
    return true;
}

// ─── Sieve of Eratosthenes (all primes up to N) ─────────────────────────────
vector<bool> sieve(int N) {
    vector<bool> is_prime(N+1, true);
    is_prime[0] = is_prime[1] = false;
    for (int i = 2; i * i <= N; i++) {
        if (is_prime[i]) {
            for (int j = i*i; j <= N; j += i)
                is_prime[j] = false;
        }
    }
    return is_prime;
}

C.9 Debugging Tips

// Use cerr for debug output (judges usually ignore stderr)
#ifdef DEBUG
    #define dbg(x) cerr << #x << " = " << x << "\n"
    #define dbgv(v) cerr << #v << ": "; for(auto x:v) cerr << x << " "; cerr << "\n"
#else
    #define dbg(x)
    #define dbgv(v)
#endif
// Compile with: g++ -DDEBUG -o sol sol.cpp  (enables debug output)
// Compile without: g++ -o sol sol.cpp  (removes debug output)

// Usage:
int x = 42;
dbg(x);         // prints: x = 42  (only in debug mode)
vi v = {1,2,3};
dbgv(v);        // prints: v: 1 2 3  (only in debug mode)

// Compile with sanitizers to catch memory errors and UB:
// g++ -fsanitize=address,undefined -O1 -o sol sol.cpp
// These are invaluable for catching:
//   - Out-of-bounds array access
//   - Integer overflow (with -fsanitize=signed-integer-overflow)
//   - Use of uninitialized memory
//   - Null pointer dereference

Fenwick Tree (BIT) — Prefix Sum with Updates

Binary Indexed Tree

The Binary Indexed Tree (BIT or Fenwick Tree) uses the lowest set bit trick to achieve O(log N) prefix sum queries and updates. Each index i is "responsible" for the range [i - lowbit(i) + 1, i] where lowbit(i) = i & (-i).

// Fenwick Tree / BIT — O(log N) update and prefix query
struct BIT {
    int n;
    vector<long long> tree;
    BIT(int n) : n(n), tree(n + 1, 0) {}

    // Add val to position i (1-indexed)
    void update(int i, long long val) {
        for (; i <= n; i += i & (-i))
            tree[i] += val;
    }

    // Prefix sum [1..i]
    long long query(int i) {
        long long sum = 0;
        for (; i > 0; i -= i & (-i))
            sum += tree[i];
        return sum;
    }

    // Range sum [l..r]
    long long query(int l, int r) { return query(r) - query(l - 1); }
};

C++ for Competitive Programming: A USACO Guide