DataInputStream
DataInputStream wraps any InputStream and adds methods for reading Java primitive types in a machine-independent binary format. It reads boolean, byte, short, int, long, float, double, and char values from the underlying stream using fixed byte widths and big-endian byte order. The big-endian, fixed-width encoding is identical to the format written by DataOutputStream, making the two classes the natural pair for serializing and deserializing primitive data across files, network connections, or inter-process pipes. DataInputStream also provides readFully(), which blocks until exactly the specified number of bytes have been read — filling a buffer completely rather than returning a partial read as InputStream.read() may do. readUTF() reads a string encoded in a modified UTF-8 format (a two-byte length prefix followed by the encoded string bytes) that was written by DataOutputStream.writeUTF(). DataInputStream is unbuffered, so it should always be wrapped inside a BufferedInputStream for performance. This entry covers the full method API, the big-endian byte order contract, readFully() vs read() semantics, the modified UTF-8 format and its limitations, end-of-file detection, and composition patterns for binary protocol parsing.
Construction, Read Methods, and Big-Endian Contract
// ── Construction: always wrap with BufferedInputStream ────────────────
try (DataInputStream dis = new DataInputStream(
new BufferedInputStream(new FileInputStream("data.bin")))) {
// reads from file with 8192-byte buffer
}
// ── Reading all primitive types ───────────────────────────────────────
try (DataInputStream dis = new DataInputStream(
new BufferedInputStream(new FileInputStream("primitives.bin")))) {
boolean flag = dis.readBoolean(); // 1 byte: 0=false, nonzero=true
byte b = dis.readByte(); // 1 byte signed: -128 to 127
int ub = dis.readUnsignedByte(); // 1 byte as int: 0 to 255
short s = dis.readShort(); // 2 bytes big-endian signed
int us = dis.readUnsignedShort(); // 2 bytes as int: 0 to 65535
char c = dis.readChar(); // 2 bytes big-endian UTF-16 char
int i = dis.readInt(); // 4 bytes big-endian signed
long l = dis.readLong(); // 8 bytes big-endian signed
float f = dis.readFloat(); // 4 bytes IEEE 754 big-endian
double d = dis.readDouble(); // 8 bytes IEEE 754 big-endian
System.out.printf("bool=%b byte=%d ubyte=%d short=%d ushort=%d%n", flag, b, ub, s, us);
System.out.printf("char=%c int=%d long=%d float=%f double=%f%n", c, i, l, f, d);
}
// ── Big-endian byte layout visualization ─────────────────────────────
// readInt() reading 0x12345678:
// Stream bytes: 0x12 0x56 0x34 0x78 ← WRONG (little-endian source)
// readInt() sees: 0x12 0x56 0x34 0x78 → 0x12345678 (treating as big-endian)
// Correct big-endian source for 0x12345678:
// 0x12 0x34 0x56 0x78
// ── EOFException on premature stream end ──────────────────────────────
byte[] partialData = {0x00, 0x00}; // only 2 bytes — not enough for readInt() (needs 4)
try (DataInputStream dis = new DataInputStream(
new ByteArrayInputStream(partialData))) {
int value = dis.readInt(); // throws EOFException — stream ended after 2 bytes
} catch (EOFException e) {
System.out.println("Stream ended before readInt could read 4 bytes");
}
// ── Little-endian: use ByteBuffer instead of DataInputStream ──────────
byte[] leBytes = {0x78, 0x56, 0x34, 0x12}; // 0x12345678 in little-endian
ByteBuffer bb = ByteBuffer.wrap(leBytes).order(ByteOrder.LITTLE_ENDIAN);
int leValue = bb.getInt(); // 0x12345678 — correct little-endian interpretation
System.out.printf("Little-endian: 0x%08X%n", leValue);readFully(), readUTF(), and End-of-File Detection
// ── readFully: guaranteed to fill the buffer completely ───────────────
try (DataInputStream dis = new DataInputStream(
new BufferedInputStream(new FileInputStream("messages.bin")))) {
// Each message: 4-byte length header + N bytes body
while (true) {
int length;
try {
length = dis.readInt(); // read 4-byte length — EOFException at clean EOF
} catch (EOFException e) {
System.out.println("No more messages");
break;
}
byte[] body = new byte[length];
dis.readFully(body); // blocks until EXACTLY 'length' bytes are read
// InputStream.read() might return partial data on network streams
processMessage(body);
}
}
// ── readFully vs read: the critical difference ────────────────────────
byte[] buf = new byte[1024];
// InputStream.read(buf): may return ANY number from 1 to 1024
// Particularly on networks: returns when ONE packet arrives (possibly just 512 bytes)
int bytesRead = inputStream.read(buf); // may be 512, not 1024
// DataInputStream.readFully(buf): blocks until EXACTLY 1024 bytes are read
// or EOFException if stream ends first
dis.readFully(buf); // always fills buf completely
// ── readFully with offset and length ─────────────────────────────────
byte[] largeBuffer = new byte[4096];
dis.readFully(largeBuffer, 0, 100); // fill bytes 0-99 from stream
dis.readFully(largeBuffer, 100, 200); // fill bytes 100-299 from stream
// ── readUTF: reads DataOutputStream.writeUTF() format ─────────────────
try (DataInputStream dis2 = new DataInputStream(
new BufferedInputStream(new FileInputStream("strings.bin")))) {
String s1 = dis2.readUTF(); // reads 2-byte length prefix, then UTF bytes
String s2 = dis2.readUTF();
System.out.println(s1 + " " + s2);
}
// readUTF() limitations:
// - Only works with DataOutputStream.writeUTF() format (not standard UTF-8 files)
// - Max 65535 bytes of modified UTF-8 per string
// - U+0000 encoded as 2-byte sequence (not standard UTF-8)
// For general UTF-8 strings from files/network:
// Read length prefix manually, then readFully into byte[], then new String(bytes, UTF_8)
byte[] strBytes = new byte[dis.readInt()]; // 4-byte length
dis.readFully(strBytes);
String s = new String(strBytes, StandardCharsets.UTF_8); // standard UTF-8 decode
// ── EOF detection patterns ────────────────────────────────────────────
// At message boundary (clean termination):
try {
while (true) {
int msgType = dis.readInt(); // EOFException here = clean end of stream
int msgLen = dis.readInt();
byte[] payload = new byte[msgLen];
dis.readFully(payload); // EOFException here = truncated stream (error)
dispatch(msgType, payload);
}
} catch (EOFException e) {
// At this point: either clean EOF (after last message) or truncated stream
// Protocol design determines which: check if we're at a message boundary
System.out.println("Stream ended");
}
// ── skipBytes: skip exactly n bytes (best-effort) ─────────────────────
int toSkip = 16;
int skipped = dis.skipBytes(toSkip); // may skip fewer than 16
if (skipped < toSkip) {
// Guarantee exact skip with readFully:
byte[] discard = new byte[toSkip - skipped];
dis.readFully(discard);
}Binary Protocol Parsing and Composition Patterns
// ── Binary protocol parsing: message framing ─────────────────────────
public class BinaryProtocolParser {
private final DataInputStream dis;
public BinaryProtocolParser(InputStream raw) {
// BufferedInputStream reduces socket read() calls to one per 8192 bytes:
this.dis = new DataInputStream(new BufferedInputStream(raw, 65536));
}
public Message readMessage() throws IOException {
// Frame header: [4-byte type][4-byte length]
int msgType;
try {
msgType = dis.readInt(); // throws EOFException at clean EOF
} catch (EOFException e) {
return null; // clean end of stream
}
int msgLen = dis.readInt();
// Read exactly msgLen bytes into a buffer:
byte[] body = new byte[msgLen];
dis.readFully(body); // blocks until complete — no partial reads
// Parse body independently:
return parseBody(msgType, body);
}
private Message parseBody(int type, byte[] body) throws IOException {
// Wrap body bytes in DataInputStream for field parsing:
try (DataInputStream bodyDis = new DataInputStream(
new ByteArrayInputStream(body))) {
return switch (type) {
case 0x01 -> {
// LOGIN: [4-byte user_id][2-byte flags][UTF-8 username]
int userId = bodyDis.readInt();
short flags = bodyDis.readShort();
int nameLen = bodyDis.readUnsignedShort();
byte[] nameBytes = new byte[nameLen];
bodyDis.readFully(nameBytes);
String username = new String(nameBytes, StandardCharsets.UTF_8);
yield new LoginMessage(userId, flags, username);
}
case 0x02 -> {
// TRADE: [8-byte timestamp][4-byte symbol_id][8-byte price][4-byte quantity]
long timestamp = bodyDis.readLong();
int symbolId = bodyDis.readInt();
long priceRaw = bodyDis.readLong(); // price * 10000 (fixed-point)
int quantity = bodyDis.readInt();
yield new TradeMessage(timestamp, symbolId, priceRaw / 10000.0, quantity);
}
default -> throw new IOException("Unknown message type: " + type);
};
}
}
}
// ── PNG signature parsing with DataInputStream ────────────────────────
public static boolean isPng(Path path) throws IOException {
try (DataInputStream dis = new DataInputStream(
new BufferedInputStream(new FileInputStream(path.toFile())))) {
// PNG signature: 8 bytes: 0x89 0x50 0x4E 0x47 0x0D 0x0A 0x1A 0x0A
byte[] sig = new byte[8];
try {
dis.readFully(sig);
} catch (EOFException e) {
return false; // file too small
}
return sig[0] == (byte)0x89
&& sig[1] == 'P'
&& sig[2] == 'N'
&& sig[3] == 'G';
}
}
// ── NIO ByteBuffer alternative for high performance ───────────────────
// Read binary message via NIO channel, parse with ByteBuffer:
SocketChannel channel = SocketChannel.open(new InetSocketAddress("host", 8080));
ByteBuffer headerBuf = ByteBuffer.allocateDirect(8); // 4-byte type + 4-byte length
headerBuf.order(ByteOrder.BIG_ENDIAN); // match DataOutputStream's byte order
// Read exactly 8 bytes:
while (headerBuf.hasRemaining()) channel.read(headerBuf);
headerBuf.flip();
int msgType = headerBuf.getInt(); // reads 4 bytes big-endian
int msgLen = headerBuf.getInt(); // reads 4 bytes big-endian
ByteBuffer bodyBuf = ByteBuffer.allocateDirect(msgLen);
while (bodyBuf.hasRemaining()) channel.read(bodyBuf);
bodyBuf.flip();
// Parse body fields:
int userId = bodyBuf.getInt();
double price = bodyBuf.getDouble();
// etc.