Externalization
Externalization is Java's mechanism for giving a class complete, explicit control over its serialized form. A class that implements java.io.Externalizable takes full responsibility for reading and writing its own state — the JVM provides no default field serialization. Externalizable declares two methods: writeExternal(ObjectOutput out) writes the object's state using the provided ObjectOutput; readExternal(ObjectInput in) reads it back. Unlike Serializable, which uses the JVM's reflection-based automatic field serialization, Externalizable gives developers explicit control over what is written, the byte-level format, the order of fields, and the encoding of each value. Externalizable classes must have a public no-argument constructor, which is called by the deserialization mechanism before readExternal() is invoked. This constructor-calling behavior is a key difference from Serializable's constructor-bypass: Externalizable deserialization does call a constructor, though it may be a no-op constructor. This entry covers the Externalizable contract in full, the two-method API and ObjectOutput/ObjectInput interfaces, the public no-arg constructor requirement, performance characteristics versus Serializable, the identity-preservation mechanism for shared references, version evolution challenges, and when Externalizable is the right choice.
The Externalizable Contract — writeExternal and readExternal
// ── Basic Externalizable implementation ──────────────────────────────
import java.io.*;
public class Point implements Externalizable {
// No serialVersionUID needed (Externalizable writes no field metadata)
// But it's still good practice to include it:
private static final long serialVersionUID = 1L;
private int x;
private int y;
// REQUIRED: public no-arg constructor — called by deserialization BEFORE readExternal
public Point() { } // must be public, must exist
public Point(int x, int y) {
this.x = x;
this.y = y;
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
// Write ONLY what we need — in any format we choose:
out.writeInt(x); // 4 bytes big-endian
out.writeInt(y); // 4 bytes big-endian
// Total: 8 bytes for this object (vs Serializable: 8 bytes for fields
// + class descriptor overhead ~50-100 bytes for first instance)
}
@Override
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
// Read in EXACTLY the same order as writeExternal:
x = in.readInt();
y = in.readInt();
// After readExternal: x and y are set — the public() constructor ran first (no-op)
}
@Override public String toString() { return "Point(" + x + ", " + y + ")"; }
}
// ── Serialization round-trip ──────────────────────────────────────────
Point original = new Point(10, 20);
// Serialize:
byte[] bytes;
try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos)) {
oos.writeObject(original);
bytes = baos.toByteArray();
}
System.out.println("Serialized size: " + bytes.length + " bytes");
// Deserialize:
// 1. JVM reads class descriptor (class name only)
// 2. JVM calls Point() no-arg constructor ← constructor IS called
// 3. JVM calls readExternal(in) on the new instance
try (ObjectInputStream ois = new ObjectInputStream(
new ByteArrayInputStream(bytes))) {
Point restored = (Point) ois.readObject();
System.out.println("Restored: " + restored); // Point(10, 20)
}
// ── Missing public no-arg constructor: runtime failure ────────────────
public class BadExternalizable implements Externalizable {
private int value;
// Only private constructor — no public no-arg constructor:
private BadExternalizable(int v) { this.value = v; }
@Override public void writeExternal(ObjectOutput out) throws IOException {
out.writeInt(value);
}
@Override public void readExternal(ObjectInput in) throws IOException {
value = in.readInt();
}
}
try (ObjectInputStream ois = new ObjectInputStream(...)) {
BadExternalizable b = (BadExternalizable) ois.readObject();
// Throws: java.lang.RuntimeException: java.lang.InstantiationException
// (no public no-arg constructor)
}ObjectOutput/ObjectInput, Shared References, and Version Evolution
// ── writeObject inside writeExternal: nested serialization ───────────
public class Order implements Externalizable {
private long orderId;
private String customerId;
private List<OrderLine> lines; // List<OrderLine> — OrderLine is Serializable
public Order() { }
@Override
public void writeExternal(ObjectOutput out) throws IOException {
out.writeLong(orderId);
out.writeUTF(customerId);
out.writeInt(lines.size());
for (OrderLine line : lines) {
out.writeObject(line); // each OrderLine serialized via its own mechanism
}
}
@Override
public void readExternal(ObjectInput in)
throws IOException, ClassNotFoundException {
orderId = in.readLong();
customerId = in.readUTF();
int count = in.readInt();
lines = new ArrayList<>(count);
for (int i = 0; i < count; i++) {
lines.add((OrderLine) in.readObject()); // cast required
}
}
}
// ── Shared reference preservation ────────────────────────────────────
// Even with Externalizable, the JVM tracks object identity at the writeObject level:
Order o1 = new Order();
Order o2 = new Order();
try (ObjectOutputStream oos = new ObjectOutputStream(new ByteArrayOutputStream())) {
oos.writeObject(o1); // O1 written, assigned handle #1
oos.writeObject(o1); // O1 again — written as back-reference to handle #1 (not duplicated)
oos.writeObject(o2); // O2 written, assigned handle #2
}
// Deserialization: two readObject() calls return the SAME Order instance for o1
// ── Version evolution with explicit version number ─────────────────────
public class VersionedPoint implements Externalizable {
private static final int CURRENT_VERSION = 2;
private int x, y;
private double z; // added in version 2
public VersionedPoint() { }
@Override
public void writeExternal(ObjectOutput out) throws IOException {
out.writeInt(CURRENT_VERSION); // always write version first
out.writeInt(x);
out.writeInt(y);
out.writeDouble(z); // version 2 addition
}
@Override
public void readExternal(ObjectInput in)
throws IOException, ClassNotFoundException {
int version = in.readInt(); // read version first
x = in.readInt();
y = in.readInt();
if (version >= 2) {
z = in.readDouble(); // only read if stream has version 2 data
} else {
z = 0.0; // default for v1 streams that didn't write z
}
if (version > CURRENT_VERSION) {
throw new IOException("Unknown version: " + version);
}
}
}
// ── Externalizable vs Serializable: when to use each ─────────────────
// USE Serializable when:
// - Simplicity is the priority
// - The class has few fields and the default format is acceptable
// - Version evolution via field addition/deletion is the main concern
// - Security via writeReplace/readResolve/serialization proxy is needed
// USE Externalizable when:
// - Full control over the byte format is required (e.g., interoperability with non-Java)
// - Maximum performance and minimum stream size are critical
// - Custom encoding (variable-length integers, packed bytes) is needed
// - The class has non-serializable fields that require custom logic anyway
// ── Performance comparison ────────────────────────────────────────────
// Serializable overhead per instance (first class occurrence):
// Class descriptor: ~60-100 bytes (class name, serialVersionUID, field count, field descriptors)
// Instance data: actual field values
// Externalizable per instance:
// Class descriptor: ~30-50 bytes (class name only — no field metadata)
// Instance data: exactly what writeExternal writes (developer-controlled)
// For a class with 3 int fields (12 bytes of actual data):
// Serializable first instance: ~80 + 12 = 92 bytes total
// Externalizable: ~40 + 12 = 52 bytes total (36% smaller)
// For subsequent instances in same stream, class descriptor is referenced not repeated:
// Serializable: ~5 + 12 = 17 bytes (back-reference to class descriptor)
// Externalizable: ~5 + 12 = 17 bytes (same — back-reference)
// So for large arrays: similar size; the difference is in the first instance onlyExternalizable vs Serializable — Security and Design Trade-offs
// ── Security: constructor called before readExternal ─────────────────
public class SecureExternalizable implements Externalizable {
private int value;
private String data;
private boolean initialized = false;
// Public no-arg constructor: initializes security sentinels
public SecureExternalizable() {
// Called BEFORE readExternal — cannot validate stream data yet,
// but can initialize internal state:
this.initialized = false;
this.value = Integer.MIN_VALUE; // sentinel
System.out.println("Constructor called (may be from untrusted source)");
}
public SecureExternalizable(int value, String data) {
if (value < 0) throw new IllegalArgumentException("value must be non-negative");
if (data == null) throw new NullPointerException("data cannot be null");
this.value = value;
this.data = data;
this.initialized = true;
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
out.writeInt(value);
out.writeUTF(data);
}
@Override
public void readExternal(ObjectInput in)
throws IOException, ClassNotFoundException {
int v = in.readInt();
String d = in.readUTF();
// Validate deserialized values — similar to readObject in Serializable:
if (v < 0) throw new IOException("Invalid value: " + v);
if (d == null) throw new IOException("data cannot be null");
this.value = v;
this.data = d;
this.initialized = true;
}
// Always validate that initialization completed:
public int getValue() {
if (!initialized) throw new IllegalStateException("Not properly initialized");
return value;
}
}
// ── Hybrid: Serializable + writeObject/readObject (usually better) ────
// Instead of Externalizable, use Serializable with custom writeObject/readObject
// for most cases:
public class HybridCustom implements Serializable {
private static final long serialVersionUID = 1L;
private int[] rawData; // large array — want compact encoding
private String name; // normal field
private transient int[] decompressed; // derived from rawData
private void writeObject(ObjectOutputStream oos) throws IOException {
oos.defaultWriteObject(); // writes rawData, name normally
// Add extra compressed version for large data:
byte[] compressed = compress(rawData);
oos.writeInt(compressed.length);
oos.write(compressed);
}
private void readObject(ObjectInputStream ois)
throws IOException, ClassNotFoundException {
ois.defaultReadObject(); // restores rawData, name
int len = ois.readInt();
byte[] compressed = new byte[len];
ois.readFully(compressed);
this.decompressed = decompress(compressed);
}
private byte[] compress(int[] data) { return new byte[0]; } // placeholder
private int[] decompress(byte[] b) { return new int[0]; } // placeholder
}
// ── Summary: choosing between Externalizable and Serializable ─────────
//
// Serializable Externalizable
// ──────────────────────────────────────────────────────────────────────
// Control over format Low (automatic) Complete
// Version evolution Easy (field matching) Manual (version numbers)
// Constructor behavior Bypassed Called (public no-arg required)
// Security hooks writeReplace/Resolve readExternal validation
// Maintenance burden Low High
// Performance Good (JVM-optimized) Better (hand-tuned possible)
// Best for Most cases Performance-critical, format-specific