Garbage Collection
Garbage collection is the automatic process by which the JVM reclaims memory occupied by objects that are no longer reachable from the running program. It is one of Java's most defining features — eliminating the manual memory management required in C and C++ and the entire class of bugs that comes with it: use-after-free, double-free, and memory leaks caused by forgotten deallocation. Understanding garbage collection means understanding reachability, GC roots, the collection process, what the GC guarantees and what it does not, how to work with it rather than against it, and how to diagnose and resolve GC-related performance problems. This entry covers the reachability model, GC triggers, what GC does not collect, finalization, the GC performance trade-off triangle, and practical guidance for writing GC-friendly code.
Reachability — What Gets Collected
// ── Reachability examples ────────────────────────────────────────────
public class ReachabilityDemo {
// Static field — GC root — objects reachable from here are always live
static List<byte[]> staticCache = new ArrayList<>();
public static void main(String[] args) {
// ── Case 1: Local variable creates strong reference ───────────
byte[] data = new byte[1024]; // reachable via local var 'data'
// 'data' is a GC root (local variable in main's stack frame)
// ── Case 2: Reassignment makes original unreachable ───────────
byte[] a = new byte[1024]; // Object A — reachable
byte[] b = new byte[1024]; // Object B — reachable
a = b; // Object A is now unreachable → garbage
// Only b (and a, both pointing to Object B) remain reachable
// ── Case 3: null makes object eligible for GC ─────────────────
byte[] c = new byte[1024]; // reachable
c = null; // unreachable → eligible for GC
// ── Case 4: Circular references — STILL collected ────────────
Node n1 = new Node();
Node n2 = new Node();
n1.next = n2;
n2.next = n1; // circular reference
n1 = null;
n2 = null;
// Both n1 and n2 objects are unreachable from GC roots
// Even though they reference each other — GC handles cycles correctly
// (Unlike reference counting which cannot handle cycles)
// ── Case 5: Collection holds objects alive ────────────────────
List<byte[]> list = new ArrayList<>();
list.add(new byte[1024]); // byte array reachable via list
// Even if no other reference to the byte array exists,
// it's alive while 'list' is alive and holds it
list.clear(); // now byte array has no more strong references
// byte array becomes eligible for GC
}
}
// ── GC root categories ────────────────────────────────────────────────
// 1. Thread stacks — all local vars in all active frames of all threads
// 2. Static fields — every static field of every loaded class
// 3. JNI global references — references held by native code
// 4. Active monitors — objects locked by synchronized()
// 5. String pool — interned strings
// 6. ClassLoader objects — and transitively, all loaded classes
// 7. System classes — java.lang.Class objects for primitive types, arraysWhat GC Guarantees — and What It Does Not
// ── GC timing is not guaranteed ──────────────────────────────────────
// WRONG: relying on GC to close a file (unreliable)
public void processFile(String path) {
FileInputStream fis = new FileInputStream(path);
// ... process ...
// fis goes out of scope, but file handle NOT closed until GC runs
// and finalizer (if any) executes — timing unpredictable
}
// CORRECT: explicit close with try-with-resources (guaranteed)
public void processFile_correct(String path) throws IOException {
try (FileInputStream fis = new FileInputStream(path)) {
// ... process ...
} // fis.close() called here — guaranteed, immediate
}
// ── System.gc() — do not call in application code ────────────────────
// WRONG: calling System.gc() to "free memory"
public void doWork() {
processLargeData();
System.gc(); // WRONG — suggestion only, may be ignored,
// may cause unnecessary full GC pause, unpredictable
}
// ── Explicit null assignment — usually unnecessary ────────────────────
public void method() {
byte[] largeArray = new byte[10_000_000];
process(largeArray);
largeArray = null; // UNNECESSARY — largeArray becomes unreachable
// when method returns anyway
doMoreWork(); // largeArray is unreachable here regardless
}
// The rare legitimate case — long method, large object used early:
public void longRunningMethod() {
byte[] largeArray = new byte[10_000_000];
process(largeArray);
largeArray = null; // LEGITIMATE — eligible for GC now
// ... hours of other work below ...
// Without null, largeArray would be reachable for the entire method
Thread.sleep(10_000);
moreWork();
}
// ── GC does NOT collect objects with strong references ────────────────
public class LeakyCache {
// Objects put here are NEVER collected — strong reference in static field
static Map<String, byte[]> cache = new HashMap<>();
public static void addToCache(String key, byte[] data) {
cache.put(key, data); // data held alive indefinitely
}
// This will eventually cause OutOfMemoryError
}GC Triggers and Collection Types
// ── GC log analysis — understanding what the JVM reports ─────────────
// Enable GC logging (Java 9+):
// java -Xlog:gc*:file=gc.log:time,uptime,level,tags MyApp
//
// Sample output lines and their meanings:
//
// [2.345s][info][gc] GC(12) Pause Young (Normal) (G1 Evacuation Pause)
// 42M->18M(256M) 8.234ms
// ↑ time ↑ type ↑before↑after↑heap ↑ STW pause duration
// Normal young GC: 24MB collected in 8ms — healthy
//
// [45.678s][info][gc] GC(67) Pause Full (G1 Compaction Pause)
// 198M->45M(256M) 423.456ms
// FULL GC: 423ms pause — investigate! Should not happen regularly
//
// [0.123s][info][gc,heap] Eden regions: 12->0(25) ← Eden emptied
// Survivor regions: 3->2(3) ← Some objects survived
// Old regions: 8->9(40) ← Some promoted to Old
// ── Monitoring GC programmatically ───────────────────────────────────
import java.lang.management.*;
List<GarbageCollectorMXBean> gcBeans =
ManagementFactory.getGarbageCollectorMXBeans();
for (GarbageCollectorMXBean gcBean : gcBeans) {
System.out.printf("GC: %-25s count=%-6d time=%dms%n",
gcBean.getName(),
gcBean.getCollectionCount(),
gcBean.getCollectionTime());
}
// Output might show:
// GC: G1 Young Generation count=1234 time=4567ms
// GC: G1 Old Generation count=2 time=890ms
// ── Allocation rate measurement ───────────────────────────────────────
// High allocation rate → frequent young GC → CPU overhead
// Measure with: jstat -gcnew <pid> 1000 (every 1 second)
//
// jstat output:
// S0C S1C S0U S1U TT MTT DSS EC EU YGC YGCT
// 0.0 3072.0 0.0 512.0 3 15 2048.0 60416.0 42048.0 892 4.234
// EU (Eden Used) growing fast = high allocation rate
// YGC (Young GC count) high = frequent collections
// ── GC trade-off: throughput vs latency vs footprint ─────────────────
//
// Parallel GC (-XX:+UseParallelGC):
// Throughput: BEST (multiple threads, less overhead)
// Latency: POOR (long STW pauses, hundreds of ms)
// Footprint: LOW
// Use case: Batch jobs, offline processing
//
// G1GC (-XX:+UseG1GC, default Java 9+):
// Throughput: GOOD
// Latency: GOOD (targets <200ms via MaxGCPauseMillis)
// Footprint: MEDIUM
// Use case: Most web applications
//
// ZGC (-XX:+UseZGC, Java 15+):
// Throughput: GOOD (modest overhead for concurrent work)
// Latency: BEST (<1ms pauses even at terabyte scale)
// Footprint: HIGHER (colored pointers overhead)
// Use case: Latency-sensitive services, large heapsWriting GC-Friendly Code
// ── Reducing allocation in hot paths ─────────────────────────────────
// ALLOCATING: boxes int, creates Iterator, creates Optional
public Optional<Integer> findFirst(List<Integer> list, int threshold) {
return list.stream()
.filter(n -> n > threshold) // lambda object (may be optimized)
.findFirst(); // Optional allocation
}
// ALLOCATION-REDUCED: no boxing, no iterator, no Optional
public int findFirstRaw(int[] data, int threshold) {
for (int i = 0; i < data.length; i++) {
if (data[i] > threshold) return data[i];
}
return -1; // sentinel instead of Optional
}
// ── String building ───────────────────────────────────────────────────
// HIGH allocation: + in loop creates N-1 intermediate Strings
String result = "";
for (String item : items) {
result += item + ", "; // N intermediate String objects!
}
// LOW allocation: one StringBuilder, one final String
StringBuilder sb = new StringBuilder(items.size() * 16);
for (String item : items) {
sb.append(item).append(", ");
}
String result2 = sb.toString(); // one allocation
// ── Reuse vs recreate ─────────────────────────────────────────────────
// HIGH allocation: new StringBuilder per call
public String formatAmount(BigDecimal amount) {
return new StringBuilder() // new object every call
.append("$").append(amount)
.toString();
}
// REUSE via ThreadLocal:
private static final ThreadLocal<StringBuilder> SB =
ThreadLocal.withInitial(() -> new StringBuilder(64));
public String formatAmountReuse(BigDecimal amount) {
StringBuilder sb = SB.get();
sb.setLength(0); // reset without reallocation
return sb.append("$").append(amount).toString();
}
// ── Prefer primitives to avoid autoboxing allocations ─────────────────
// HIGH allocation: each int boxed to Integer
Map<String, Integer> counters = new HashMap<>();
counters.merge("key", 1, Integer::sum); // Integer boxing on each update
// LOW allocation: use IntStream.sum() or primitive maps (Eclipse Collections, etc.)
int[] counter = {0};
counter[0]++; // no allocation
// ── Object pooling — only for expensive objects ────────────────────────
// Appropriate: connection pools (socket + auth overhead)
// Appropriate: compiled Pattern objects (regex compilation overhead)
// NOT appropriate: ordinary POJOs (nanosecond creation, no benefit)
private static final Pattern PHONE_PATTERN =
Pattern.compile("^\+?[\d\s\-()]{10,}$"); // compile once, reuse
public boolean isValidPhone(String phone) {
return PHONE_PATTERN.matcher(phone).matches(); // matcher is cheap
}