From 5db1cc4c15c5de67f4867d745f2e3fa4928f4600 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 21 Aug 2024 10:31:47 +0300 Subject: [PATCH 01/77] Refactored Offset interface and classes for printing the object in SyslogRecord format. --- .../consumers/kafka/DatabaseOutput.java | 2 +- .../cfe_39/consumers/kafka/NullOffset.java | 14 +- .../cfe_39/consumers/kafka/Offset.java | 8 +- .../cfe_39/consumers/kafka/RecordOffset.java | 124 +++++++++++++++++- .../teragrep/cfe_39/KafkaConsumerTest.java | 66 +++++----- 5 files changed, 177 insertions(+), 37 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java index ae519335..f626bfce 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java @@ -257,7 +257,7 @@ public void accept(List recordOffsetObjectList) { } } - byte[] byteArray = recordOffsetObject.getRecord(); // loads the byte[] contained in recordOffsetObject.getRecord() to byteArray. + byte[] byteArray = recordOffsetObject.record(); // loads the byte[] contained in recordOffsetObject.getRecord() to byteArray. if (byteArray == null) { if (skipEmptyRFC5424Records) { if (LOGGER.isDebugEnabled()) { diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullOffset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullOffset.java index 08fa3f22..b33b719a 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullOffset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullOffset.java @@ -45,6 +45,8 @@ */ package com.teragrep.cfe_39.consumers.kafka; +import com.teragrep.cfe_39.avro.SyslogRecord; + // Null object design pattern, used to create null offset objects. public final class NullOffset implements Offset { @@ -54,12 +56,22 @@ public boolean isNull() { } @Override - public byte[] getRecord() { + public byte[] record() { return new byte[0]; } + @Override + public long size() { + return 0; + } + @Override public String offsetToJSON() { return "{\"topic\":\"Not available\", \"partition\":0, \"offset\":0}"; } + + @Override + public SyslogRecord toSyslogRecord() { + return SyslogRecord.newBuilder().build(); + } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java index ada4c4fd..a4df4fa1 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java @@ -45,11 +45,17 @@ */ package com.teragrep.cfe_39.consumers.kafka; +import com.teragrep.cfe_39.avro.SyslogRecord; + public interface Offset { boolean isNull(); - byte[] getRecord(); + byte[] record(); + + long size(); String offsetToJSON(); + + SyslogRecord toSyslogRecord(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java index 543a58fd..5aca9ee2 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java @@ -45,6 +45,18 @@ */ package com.teragrep.cfe_39.consumers.kafka; +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.rlo_06.Fragment; +import com.teragrep.rlo_06.RFC5424Frame; +import com.teragrep.rlo_06.RFC5424Timestamp; +import com.teragrep.rlo_06.SDVector; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.time.Instant; + // This is the class for handling the Kafka record topic/partition/offset data that are required for HDFS storage. public final class RecordOffset implements Offset { @@ -66,13 +78,123 @@ public boolean isNull() { } @Override - public byte[] getRecord() { + public byte[] record() { return record; } + @Override + public long size() { + return record.length; + } + @Override public String offsetToJSON() { return String .format("{\"topic\":\"%s\", \"partition\":%d, \"offset\":%d}", this.topic, this.partition, this.offset); } + + @Override + public SyslogRecord toSyslogRecord() { + RFC5424Frame rfc5424Frame = new RFC5424Frame(false); + InputStream inputStream = new ByteArrayInputStream(record); + rfc5424Frame.load(inputStream); + + Instant instant = new RFC5424Timestamp(rfc5424Frame.timestamp).toZonedDateTime().toInstant(); + long MICROS_PER_SECOND = 1000L * 1000L; + long NANOS_PER_MICROS = 1000L; + long sec = Math.multiplyExact(instant.getEpochSecond(), MICROS_PER_SECOND); + long epochMicros = Math.addExact(sec, instant.getNano() / NANOS_PER_MICROS); + + // input + final byte[] source = eventToSource(rfc5424Frame); + + // origin + final byte[] origin = eventToOrigin(rfc5424Frame); + + return SyslogRecord + .newBuilder() + .setTimestamp(epochMicros) + .setPayload(rfc5424Frame.msg.toString()) + .setDirectory(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "directory")).toString()).setStream(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "streamname")).toString()).setHost(rfc5424Frame.hostname.toString()).setInput(new String(source, StandardCharsets.UTF_8)).setPartition(String.valueOf(partition)).setOffset(offset).setOrigin(new String(origin, StandardCharsets.UTF_8)).build(); + } + + private byte[] eventToOrigin(RFC5424Frame rfc5424Frame) { + byte[] origin; + Fragment originFragment = rfc5424Frame.structuredData.getValue(new SDVector("origin@48577", "hostname")); + if (!originFragment.isStub) { + origin = originFragment.toBytes(); + } + else { + origin = new byte[] {}; + } + return origin; + } + + private byte[] eventToSource(RFC5424Frame rfc5424Frame) { + /*input is produced from SD element event_node_source@48577 by + concatenating "source_module:hostname:source". in case + if event_node_source@48577 is not available use event_node_relay@48577. + If neither are present, use null value.*/ + + ByteBuffer sourceConcatenationBuffer = ByteBuffer.allocateDirect(256 * 1024); + sourceConcatenationBuffer.clear(); + + Fragment sourceModuleFragment = rfc5424Frame.structuredData + .getValue(new SDVector("event_node_source@48577", "source_module")); + if (sourceModuleFragment.isStub) { + sourceModuleFragment = rfc5424Frame.structuredData + .getValue(new SDVector("event_node_relay@48577", "source_module")); + } + + byte[] source_module; + if (!sourceModuleFragment.isStub) { + source_module = sourceModuleFragment.toBytes(); + } + else { + source_module = new byte[] {}; + } + + Fragment sourceHostnameFragment = rfc5424Frame.structuredData + .getValue(new SDVector("event_node_source@48577", "hostname")); + if (sourceHostnameFragment.isStub) { + sourceHostnameFragment = rfc5424Frame.structuredData + .getValue(new SDVector("event_node_relay@48577", "hostname")); + } + + byte[] source_hostname; + if (!sourceHostnameFragment.isStub) { + source_hostname = sourceHostnameFragment.toBytes(); + } + else { + source_hostname = new byte[] {}; + } + + Fragment sourceSourceFragment = rfc5424Frame.structuredData + .getValue(new SDVector("event_node_source@48577", "source")); + if (sourceHostnameFragment.isStub) { + sourceSourceFragment = rfc5424Frame.structuredData + .getValue(new SDVector("event_node_relay@48577", "source")); + } + + byte[] source_source; + if (!sourceSourceFragment.isStub) { + source_source = sourceSourceFragment.toBytes(); + } + else { + source_source = new byte[] {}; + } + + sourceConcatenationBuffer.put(source_module); + sourceConcatenationBuffer.put((byte) ':'); + sourceConcatenationBuffer.put(source_hostname); + sourceConcatenationBuffer.put((byte) ':'); + sourceConcatenationBuffer.put(source_source); + + sourceConcatenationBuffer.flip(); + byte[] input = new byte[sourceConcatenationBuffer.remaining()]; + sourceConcatenationBuffer.get(input); + + return input; + } + } diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 0087786a..af83fff1 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -159,7 +159,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -173,7 +173,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(0).get(counter); @@ -182,7 +182,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); ParseException e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -195,7 +195,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -209,7 +209,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(0).get(counter); @@ -218,7 +218,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -231,7 +231,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -245,7 +245,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(0).get(counter); @@ -254,7 +254,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -267,7 +267,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -281,7 +281,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(0).get(counter); @@ -290,7 +290,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -303,7 +303,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -317,7 +317,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(0).get(counter); @@ -326,7 +326,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -342,7 +342,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -356,7 +356,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(1).get(counter); @@ -365,7 +365,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -378,7 +378,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -392,7 +392,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(1).get(counter); @@ -401,7 +401,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -414,7 +414,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -428,7 +428,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(1).get(counter); @@ -437,7 +437,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -450,7 +450,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -464,7 +464,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(1).get(counter); @@ -473,7 +473,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -486,7 +486,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + i + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -500,7 +500,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + 14 + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(1).get(counter); @@ -509,7 +509,7 @@ public void readCoordinatorTest2Threads() { "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + 15 + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -612,7 +612,7 @@ public void readCoordinatorTest1Thread() { + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); @@ -627,7 +627,7 @@ public void readCoordinatorTest1Thread() { + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.getRecord()); + Assertions.assertNull(recordOffset.record()); counter++; recordOffset = messages.get(0).get(counter); @@ -637,7 +637,7 @@ public void readCoordinatorTest1Thread() { + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.getRecord())); + rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); ParseException e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; From 58375bdb97edd9c428da4cd1336edbd4db4b51fd Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 21 Aug 2024 11:45:30 +0300 Subject: [PATCH 02/77] Added missing rfc5424Frame.next() --- .../cfe_39/consumers/kafka/Offset.java | 4 +- .../cfe_39/consumers/kafka/RecordOffset.java | 45 +++++++++---------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java index a4df4fa1..ee0a89a9 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java @@ -47,6 +47,8 @@ import com.teragrep.cfe_39.avro.SyslogRecord; +import java.io.IOException; + public interface Offset { boolean isNull(); @@ -57,5 +59,5 @@ public interface Offset { String offsetToJSON(); - SyslogRecord toSyslogRecord(); + SyslogRecord toSyslogRecord() throws IOException; } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java index 5aca9ee2..3f1280bd 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java @@ -46,12 +46,10 @@ package com.teragrep.cfe_39.consumers.kafka; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.rlo_06.Fragment; -import com.teragrep.rlo_06.RFC5424Frame; -import com.teragrep.rlo_06.RFC5424Timestamp; -import com.teragrep.rlo_06.SDVector; +import com.teragrep.rlo_06.*; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; @@ -94,28 +92,29 @@ public String offsetToJSON() { } @Override - public SyslogRecord toSyslogRecord() { + public SyslogRecord toSyslogRecord() throws ParseException, IOException { RFC5424Frame rfc5424Frame = new RFC5424Frame(false); InputStream inputStream = new ByteArrayInputStream(record); rfc5424Frame.load(inputStream); - - Instant instant = new RFC5424Timestamp(rfc5424Frame.timestamp).toZonedDateTime().toInstant(); - long MICROS_PER_SECOND = 1000L * 1000L; - long NANOS_PER_MICROS = 1000L; - long sec = Math.multiplyExact(instant.getEpochSecond(), MICROS_PER_SECOND); - long epochMicros = Math.addExact(sec, instant.getNano() / NANOS_PER_MICROS); - - // input - final byte[] source = eventToSource(rfc5424Frame); - - // origin - final byte[] origin = eventToOrigin(rfc5424Frame); - - return SyslogRecord - .newBuilder() - .setTimestamp(epochMicros) - .setPayload(rfc5424Frame.msg.toString()) - .setDirectory(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "directory")).toString()).setStream(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "streamname")).toString()).setHost(rfc5424Frame.hostname.toString()).setInput(new String(source, StandardCharsets.UTF_8)).setPartition(String.valueOf(partition)).setOffset(offset).setOrigin(new String(origin, StandardCharsets.UTF_8)).build(); + if (rfc5424Frame.next()) { + Instant instant = new RFC5424Timestamp(rfc5424Frame.timestamp).toZonedDateTime().toInstant(); + long MICROS_PER_SECOND = 1000L * 1000L; + long NANOS_PER_MICROS = 1000L; + long sec = Math.multiplyExact(instant.getEpochSecond(), MICROS_PER_SECOND); + long epochMicros = Math.addExact(sec, instant.getNano() / NANOS_PER_MICROS); + // input + final byte[] source = eventToSource(rfc5424Frame); + // origin + final byte[] origin = eventToOrigin(rfc5424Frame); + return SyslogRecord + .newBuilder() + .setTimestamp(epochMicros) + .setPayload(rfc5424Frame.msg.toString()) + .setDirectory(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "directory")).toString()).setStream(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "streamname")).toString()).setHost(rfc5424Frame.hostname.toString()).setInput(new String(source, StandardCharsets.UTF_8)).setPartition(String.valueOf(partition)).setOffset(offset).setOrigin(new String(origin, StandardCharsets.UTF_8)).build(); + } + else { + return SyslogRecord.newBuilder().build(); + } } private byte[] eventToOrigin(RFC5424Frame rfc5424Frame) { From be70879f393c8457388d409dd802713ac113d71d Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 22 Aug 2024 17:31:28 +0300 Subject: [PATCH 03/77] Refactoring DatabaseOutput.java and related classes, WIP. Renamed Offset interface to KafkaRecord. Renamed RecordOffset to KafkaRecordImpl. Renamed NullOffset to NullKafkaRecord. Implemented KafkaRecordConverter for converting kafka records to SyslogRecords. Refactored WriteableQueue to be immutable. --- .../consumers/kafka/DatabaseOutput.java | 410 +++--------------- .../cfe_39/consumers/kafka/HDFSWrite.java | 12 +- .../consumers/kafka/HdfsDataIngestion.java | 2 +- .../cfe_39/consumers/kafka/KafkaReader.java | 10 +- .../kafka/{Offset.java => KafkaRecord.java} | 6 +- ...dOffset.java => KafkaRecordConverter.java} | 151 ++++--- .../consumers/kafka/KafkaRecordImpl.java | 96 ++++ .../{NullOffset.java => NullKafkaRecord.java} | 2 +- .../cfe_39/consumers/kafka/PartitionFile.java | 109 +++++ .../consumers/kafka/ReadCoordinator.java | 6 +- .../consumers/kafka/SyslogAvroWriter.java | 2 +- .../consumers/kafka/queue/WritableQueue.java | 6 +- .../java/com/teragrep/cfe_39/HdfsTest.java | 8 +- .../teragrep/cfe_39/KafkaConsumerTest.java | 14 +- .../cfe_39/ProcessingFailureTest.java | 24 +- .../com/teragrep/cfe_39/ProcessingTest.java | 318 +++++++++++++- 16 files changed, 700 insertions(+), 476 deletions(-) rename src/main/java/com/teragrep/cfe_39/consumers/kafka/{Offset.java => KafkaRecord.java} (95%) rename src/main/java/com/teragrep/cfe_39/consumers/kafka/{RecordOffset.java => KafkaRecordConverter.java} (51%) create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java rename src/main/java/com/teragrep/cfe_39/consumers/kafka/{NullOffset.java => NullKafkaRecord.java} (97%) create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java index f626bfce..c458f956 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java @@ -47,124 +47,58 @@ import com.google.gson.*; import com.teragrep.cfe_39.Config; -import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import com.teragrep.cfe_39.metrics.DurationStatistics; -import com.teragrep.rlo_06.*; +import com.teragrep.rlo_06.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; -import java.nio.charset.StandardCharsets; import java.time.Instant; -import java.time.ZonedDateTime; -import java.util.List; +import java.util.*; import java.util.function.Consumer; -import java.nio.ByteBuffer; - /* The kafka stream should first be deserialized using rlo_06 and then serialized again using avro and stored in HDFS. The target where the record is stored in HDFS is based on the topic, partition and offset. ie. topic_name/0.123456 where offset is 123456 The mock consumer is activated for testing using the configuration file: readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")*/ -public class DatabaseOutput implements Consumer> { +public class DatabaseOutput implements Consumer> { private static final Logger LOGGER = LoggerFactory.getLogger(DatabaseOutput.class); - private final RFC5424Frame rfc5424Frame = new RFC5424Frame(false); - - private final String table; - + private final String topic; private final DurationStatistics durationStatistics; private final TopicCounter topicCounter; - - private long lastTimeCalled = Instant.now().toEpochMilli(); - - private SyslogAvroWriter syslogAvroWriter; - private final long maximumFileSize; - private final WritableQueue writableQueue; - private final ByteBuffer sourceConcatenationBuffer; - private final SDVector teragrepStreamName; - private final SDVector teragrepDirectory; - private final SDVector eventNodeSourceSource; - private final SDVector eventNodeRelaySource; - private final SDVector eventNodeSourceSourceModule; - private final SDVector eventNodeRelaySourceModule; - private final SDVector eventNodeSourceHostname; - private final SDVector eventNodeRelayHostname; - private final SDVector originHostname; - private File syslogFile; + private long lastTimeCalled; private final Config config; private final boolean skipNonRFC5424Records; private final boolean skipEmptyRFC5424Records; + private final Map partitionFileMap; + // BatchDistribution? RecordDistribution? public DatabaseOutput( Config config, - String table, + String topic, DurationStatistics durationStatistics, TopicCounter topicCounter ) { this.config = config; - this.table = table; + this.topic = topic; this.durationStatistics = durationStatistics; this.topicCounter = topicCounter; - this.maximumFileSize = config.getMaximumFileSize(); - - // queueDirectory and queueNamePrefix are only used for temporarily storing the AVRO-serialized files before committing them to HDFS when the file size reaches the threshold (or all records are processed). - this.writableQueue = new WritableQueue(config.getQueueDirectory(), table); - - this.sourceConcatenationBuffer = ByteBuffer.allocateDirect(256 * 1024); - teragrepStreamName = new SDVector("teragrep@48577", "streamname"); - teragrepDirectory = new SDVector("teragrep@48577", "directory"); - this.eventNodeSourceSource = new SDVector("event_node_source@48577", "source"); - this.eventNodeRelaySource = new SDVector("event_node_relay@48577", "source"); - this.eventNodeSourceSourceModule = new SDVector("event_node_source@48577", "source_module"); - this.eventNodeRelaySourceModule = new SDVector("event_node_relay@48577", "source_module"); - this.eventNodeSourceHostname = new SDVector("event_node_source@48577", "hostname"); - this.eventNodeRelayHostname = new SDVector("event_node_relay@48577", "hostname"); - this.originHostname = new SDVector("origin@48577", "hostname"); this.skipNonRFC5424Records = config.getSkipNonRFC5424Records(); this.skipEmptyRFC5424Records = config.getSkipEmptyRFC5424Records(); - } - - // Checks that the filesize stays under the defined maximum file size. If the file is about to go over target limit commits the file to HDFS and returns true, otherwise does nothing and returns false. - private boolean writeToHdfs(long fileSize, JsonObject recordOffsetObjectJo) { - try { - // If the syslogAvroWriter is already initialized, check the filesize so it doesn't go above maximumFileSize. - if (fileSize > maximumFileSize) { - // file too large for adding the new record to it, write the still adequately sized AVRO-file to the HDFS database and create a new empty AVRO-file. - - // This part closes the writing of now "complete" AVRO-file and stores the file to HDFS. - syslogAvroWriter.close(); - try (HDFSWrite writer = new HDFSWrite(config, recordOffsetObjectJo)) { - writer.commit(syslogFile); // commits the final AVRO-file to HDFS. - } - return true; - } - } - catch (IOException ioException) { - throw new UncheckedIOException(ioException); - } - return false; - } + this.partitionFileMap = new HashMap<>(); - private long rfc3339ToEpoch(ZonedDateTime zonedDateTime) { - final Instant instant = zonedDateTime.toInstant(); - - final long MICROS_PER_SECOND = 1000L * 1000L; - final long NANOS_PER_MICROS = 1000L; - final long sec = Math.multiplyExact(instant.getEpochSecond(), MICROS_PER_SECOND); - - return Math.addExact(sec, instant.getNano() / NANOS_PER_MICROS); + this.lastTimeCalled = Instant.now().toEpochMilli(); } - /* Input parameter is a list of RecordOffsetObjects. Each object contains a record and its metadata (topic, partition and offset). + /* Input parameter is a batch of RecordOffsetObjects from kafka. Each object contains a record and its metadata (topic, partition and offset). Each partition will get their set of exclusive AVRO-files in HDFS. The target where the record is stored in HDFS is based on the topic, partition and last offset. ie. topic_name/0.123456 where last written record's offset is 123456. AVRO-file with a path/name that starts with topic_name/0.X should only contain records from the 0th partition of topic named topic_name, topic_name/1.X should only contain records from 1st partition, etc. AVRO-files are created dynamically, thus it is not known which record (and its offset) is written to the file last before committing it to HDFS. The final name for the HDFS file is decided only when the file is committed to HDFS.*/ @Override - public void accept(List recordOffsetObjectList) { + public void accept(List recordOffsetObjectList) { long thisTime = Instant.now().toEpochMilli(); long ftook = thisTime - lastTimeCalled; topicCounter.setKafkaLatency(ftook); @@ -172,320 +106,104 @@ public void accept(List recordOffsetObjectList) { LOGGER .debug( "Fuura searching your batch for <[{}]> with records <{}> and took <{}> milliseconds. <{}> EPS. ", - table, recordOffsetObjectList.size(), (ftook), + topic, recordOffsetObjectList.size(), (ftook), (recordOffsetObjectList.size() * 1000L / ftook) ); } long batchBytes = 0L; /* The recordOffsetObjectList loop will go through all the objects in the list. - While it goes through the list, the contents of the objects are serialized into an AVRO-file. + The objects can serialize their contents into SyslogRecords that can be stored to an AVRO-file. When the file size is about to go above 64M, commit the file into HDFS using the latest topic/partition/offset values as the filename and start fresh with a new empty AVRO-file. - Serialize the object that was going to make the file go above 64M into the now empty AVRO-file and continue the loop. - TODO: If the prod-environment recordOffsetObjectList ordering is different from what it is in the test environment, add a function that reorders the list based on partition and offset (or better yet, make several AVRO-files that are being used at the same time rather than doing it one AVRO-file at a time as the offset ordering within partitions should always be correct in all scenarios).*/ - Offset lastObject = new NullOffset(); // Set to null object before initializing as RecordOffsetObject. - JsonObject lastObjectJo = JsonParser.parseString(lastObject.offsetToJSON()).getAsJsonObject(); + Serialize the object that was going to make the file go above 64M into the now empty AVRO-file. + .*/ long start = Instant.now().toEpochMilli(); // Starts measuring performance here. Measures how long it takes to process the whole recordOffsetObjectList. - // This loop goes through all the records of the mock data in a single session. - for (RecordOffset recordOffsetObject : recordOffsetObjectList) { - JsonObject recordOffsetObjectJo = JsonParser - .parseString(recordOffsetObject.offsetToJSON()) - .getAsJsonObject(); - // Initializing syslogAvroWriter and lastObject. - if (syslogAvroWriter == null && lastObject.isNull()) { + ListIterator recordOffsetListIterator = recordOffsetObjectList.listIterator(); + while (recordOffsetListIterator.hasNext()) { + // process recordOffsetObjectList here, the consumer only consumes 500 records in a single batch so the file can't be committed during a single accept(). + // Distribute the records to a PartitionFile object based on partition from which the record originates from. + + // load the next KafkaRecord + KafkaRecordImpl next = recordOffsetListIterator.next(); + // Read the topic, partition and offset information of the record + JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); + String topic = recordOffset.get("topic").getAsString(); + String partition = recordOffset.get("partition").getAsString(); + // Pass the record to the PartitionFile object that it belongs to. If the correct PartitionFile doesn't exist, create one. + if (!partitionFileMap.containsKey(partition)) { try { - writableQueue - .setQueueNamePrefix(recordOffsetObjectJo.get("topic").getAsString() + recordOffsetObjectJo.get("partition").getAsString()); - syslogFile = writableQueue.getNextWritableFile(); - // The HDFS filename is only finalized when the AVRO-serialized file is finalized, because every Kafka-record added to the file is going to change the offset that is going to be used for the filename. - syslogAvroWriter = new SyslogAvroWriter(syslogFile); - lastObject = recordOffsetObject; - lastObjectJo = JsonParser.parseString(lastObject.offsetToJSON()).getAsJsonObject(); + partitionFileMap.put(partition, new PartitionFile(config, topic, partition)); } - catch (IOException ioException) { - throw new IllegalArgumentException(ioException); + catch (IOException e) { + throw new RuntimeException(e); } } - else { - try { - if ( - lastObjectJo.get("topic").getAsString().equals(recordOffsetObjectJo.get("topic").getAsString()) - && lastObjectJo.get("partition").getAsString().equals(recordOffsetObjectJo.get("partition").getAsString()) - ) { - // Records left to consume in the current partition. - boolean fileCommitted = writeToHdfs(syslogAvroWriter.getFileSize(), lastObjectJo); - if (fileCommitted) { - // This part defines a new empty file to which the new AVRO-serialized records are stored until it again hits the size limit defined in config. - writableQueue - .setQueueNamePrefix(recordOffsetObjectJo.get("topic").getAsString() + recordOffsetObjectJo.get("partition").getAsString()); - syslogFile = writableQueue.getNextWritableFile(); - syslogAvroWriter = new SyslogAvroWriter(syslogFile); - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Target file size reached, file <{}> stored to <{}> in HDFS", syslogFile - .getName(), - lastObjectJo.get("topic").getAsString() + "/" + lastObjectJo.get("partition").getAsString() + "." + lastObjectJo.get("offset").getAsString() - ); - } - } - else { - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Target file size not yet reached, continuing writing records to <{}>.", - syslogFile.getName() - ); - } - } - } - else { - // Previous partition was fully consumed. Commit file to HDFS and create a new AVRO-file. - syslogAvroWriter.close(); - HDFSWrite writer = new HDFSWrite(config, lastObjectJo); - writer.commit(syslogFile); - - // This part defines a new empty file to which the new AVRO-serialized records are stored until it again hits the 64M size limit. - writableQueue - .setQueueNamePrefix(recordOffsetObjectJo.get("topic").getAsString() + recordOffsetObjectJo.get("partition").getAsString()); - syslogFile = writableQueue.getNextWritableFile(); - syslogAvroWriter = new SyslogAvroWriter(syslogFile); - } - } - catch (IOException ioException) { - throw new UncheckedIOException(ioException); - } + // Every PartitionFile object will hold responsibility over a single unique file that is related to a single topic partition. + PartitionFile recordPartitionFile = partitionFileMap.get(partition); + // Tell PartitionFile to add the current record to the list of records that are going to be added to the file. Handle skipping of broken records. + try { + recordPartitionFile.addRecord(next.toSyslogRecord()); } - - byte[] byteArray = recordOffsetObject.record(); // loads the byte[] contained in recordOffsetObject.getRecord() to byteArray. - if (byteArray == null) { - if (skipEmptyRFC5424Records) { - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Skipping processing an empty non RFC5424 record. Record metadata: {}", - recordOffsetObject.offsetToJSON() - ); - } - continue; + catch (ParseException e) { + if (skipNonRFC5424Records) { + LOGGER + .warn( + "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", + recordOffset, e + ); } else { - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Null record metadata: {}", recordOffsetObject.offsetToJSON()); - } - syslogFile.delete(); // Clean up - throw new NullPointerException("Record with null content detected during processing."); - } - - } - InputStream inputStream = new ByteArrayInputStream(byteArray); - rfc5424Frame.load(inputStream); - try { - if (rfc5424Frame.next()) { - /*rfc5424Frame has loaded the record data, it's ready for deserialization. - Implement AVRO serialization for the Kafka records here, preparing the data for writing to HDFS. - Write all the data into a file using AVRO. - The size of each AVRO-serialized file should be as close to 64M as possible.*/ - - batchBytes = batchBytes + byteArray.length; - - // input - final byte[] source = eventToSource(); - - // origin - final byte[] origin = eventToOrigin(); - - // Format: Use AVRO format with syslog columns as indexed ones - final long epochMicros = rfc3339ToEpoch( - new RFC5424Timestamp(rfc5424Frame.timestamp).toZonedDateTime() - ); - SyslogRecord syslogRecord = SyslogRecord - .newBuilder() - .setTimestamp(epochMicros) - .setPayload(rfc5424Frame.msg.toString()) - .setDirectory(rfc5424Frame.structuredData.getValue(teragrepDirectory).toString()) - .setStream(rfc5424Frame.structuredData.getValue(teragrepStreamName).toString()) // Or is sourcetype/stream supposed to be rfc5424Frame.appName.toString() instead? - .setHost(rfc5424Frame.hostname.toString()) - .setInput(new String(source, StandardCharsets.UTF_8)) - .setPartition(recordOffsetObjectJo.get("partition").getAsString()) - .setOffset(recordOffsetObjectJo.get("offset").getAsLong()) - .setOrigin(new String(origin, StandardCharsets.UTF_8)) - .build(); - - // Calculate the size of syslogRecord that is going to be written to syslogAvroWriter-file. - long capacity = syslogRecord.toByteBuffer().capacity(); - // Check if there is still room in syslogAvroWriter for another syslogRecord. Commit syslogAvroWriter to HDFS if no room left, emptying it out in the process. - boolean fileCommitted = writeToHdfs(syslogAvroWriter.getFileSize() + capacity, lastObjectJo); - if (fileCommitted) { - // This part defines a new empty file to which the new AVRO-serialized records are stored until it again hits the size limit defined in config. - writableQueue - .setQueueNamePrefix(recordOffsetObjectJo.get("topic").getAsString() + recordOffsetObjectJo.get("partition").getAsString()); - syslogFile = writableQueue.getNextWritableFile(); - syslogAvroWriter = new SyslogAvroWriter(syslogFile); - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Target file size reached, file <{}> stored to <{}/{}.{}> in HDFS", - syslogFile.getName(), lastObjectJo.get("topic").getAsString(), lastObjectJo.get("partition").getAsString(), lastObjectJo.get("offset").getAsString() - ); - } - } - else { - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Target file size not yet reached, continuing writing records to <{}>.", - syslogFile.getName() - ); - } - } - // Add syslogRecord to syslogAvroWriter which has room for new syslogRecord. - syslogAvroWriter.write(syslogRecord); - lastObject = recordOffsetObject; - lastObjectJo = JsonParser.parseString(lastObject.offsetToJSON()).getAsJsonObject(); + throw new RuntimeException(e); } } - catch (IOException e) { - throw new UncheckedIOException(e); - } - catch (ParseException e) { - if (skipNonRFC5424Records) { - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Skipping processing a non RFC5424 record, record metadata: {}. Exception information: ", - recordOffsetObject.offsetToJSON(), e - ); - } - continue; + catch (NullPointerException e) { + if (skipEmptyRFC5424Records) { + LOGGER + .warn( + "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", + recordOffset, e + ); } else { - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Record metadata that is causing ParseException: {}.", - recordOffsetObject.offsetToJSON() - ); - } - syslogFile.delete(); // Clean up throw new RuntimeException(e); } } } - // Handle the "leftover" syslogRecords from the loop. - try { - if (syslogAvroWriter != null && !lastObject.isNull()) { - syslogAvroWriter.close(); - try (HDFSWrite writer = new HDFSWrite(config, lastObjectJo)) { - writer.commit(syslogFile); // commits the final AVRO-file to HDFS. - } + // When all records in the current batch have been distributed to different PartitionFile objects successfully, commit the adding of records to the files for all PartitionFile objects. + partitionFileMap.forEach((key, value) -> { + try { + value.commitRecords(); } - } - catch (IOException e) { - throw new UncheckedIOException(e); - } + catch (IOException e) { + LOGGER.error("Failed to write the SyslogRecords to PartitionFile <{}> in topic <{}>", key, topic); + // FIXME: Delete the files that were stored to HDFS before the exception hit, to make sure data integrity is preserved during consumer rebalance as kafka consumer will not mark the failed record batch as committed. + throw new RuntimeException(e); + } + }); // Measures performance of code that is between start and end. long end = Instant.now().toEpochMilli(); - long took = (end - start); topicCounter.setDatabaseLatency(took); - if (took == 0) { took = 1; } long rps = recordOffsetObjectList.size() * 1000L / took; topicCounter.setRecordsPerSecond(rps); - long bps = batchBytes * 1000 / took; topicCounter.setBytesPerSecond(bps); - durationStatistics.addAndGetRecords(recordOffsetObjectList.size()); durationStatistics.addAndGetBytes(batchBytes); - topicCounter.addToTotalBytes(batchBytes); topicCounter.addToTotalRecords(recordOffsetObjectList.size()); - if (LOGGER.isDebugEnabled()) { LOGGER .debug( "Sent batch for <[{}]> with records <{}> and size <{}> KB took <{}> milliseconds. <{}> RPS. <{}> KB/s ", - table, recordOffsetObjectList.size(), batchBytes / 1024, (took), rps, bps / 1024 + topic, recordOffsetObjectList.size(), batchBytes / 1024, (took), rps, bps / 1024 ); } lastTimeCalled = Instant.now().toEpochMilli(); } - - private byte[] eventToOrigin() { - byte[] origin; - Fragment originFragment = rfc5424Frame.structuredData.getValue(originHostname); - if (!originFragment.isStub) { - origin = originFragment.toBytes(); - } - else { - origin = new byte[] {}; - } - return origin; - } - - private byte[] eventToSource() { - /*input is produced from SD element event_node_source@48577 by - concatenating "source_module:hostname:source". in case - if event_node_source@48577 is not available use event_node_relay@48577. - If neither are present, use null value.*/ - - sourceConcatenationBuffer.clear(); - - Fragment sourceModuleFragment = rfc5424Frame.structuredData.getValue(eventNodeSourceSourceModule); - if (sourceModuleFragment.isStub) { - sourceModuleFragment = rfc5424Frame.structuredData.getValue(eventNodeRelaySourceModule); - } - - byte[] source_module; - if (!sourceModuleFragment.isStub) { - source_module = sourceModuleFragment.toBytes(); - } - else { - source_module = new byte[] {}; - } - - Fragment sourceHostnameFragment = rfc5424Frame.structuredData.getValue(eventNodeSourceHostname); - if (sourceHostnameFragment.isStub) { - sourceHostnameFragment = rfc5424Frame.structuredData.getValue(eventNodeRelayHostname); - } - - byte[] source_hostname; - if (!sourceHostnameFragment.isStub) { - source_hostname = sourceHostnameFragment.toBytes(); - } - else { - source_hostname = new byte[] {}; - } - - Fragment sourceSourceFragment = rfc5424Frame.structuredData.getValue(eventNodeSourceSource); - if (sourceHostnameFragment.isStub) { - sourceSourceFragment = rfc5424Frame.structuredData.getValue(eventNodeRelaySource); - } - - byte[] source_source; - if (!sourceSourceFragment.isStub) { - source_source = sourceSourceFragment.toBytes(); - } - else { - source_source = new byte[] {}; - } - - sourceConcatenationBuffer.put(source_module); - sourceConcatenationBuffer.put((byte) ':'); - sourceConcatenationBuffer.put(source_hostname); - sourceConcatenationBuffer.put((byte) ':'); - sourceConcatenationBuffer.put(source_source); - - sourceConcatenationBuffer.flip(); - byte[] input = new byte[sourceConcatenationBuffer.remaining()]; - sourceConcatenationBuffer.get(input); - - return input; - } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index c949ee81..f6b8c81e 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -45,7 +45,6 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.google.gson.JsonObject; import com.teragrep.cfe_39.Config; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -68,7 +67,7 @@ public class HDFSWrite implements AutoCloseable { private final HdfsConfiguration conf; private final String hdfsuri; - public HDFSWrite(Config config, JsonObject lastObjectJo) throws IOException { + public HDFSWrite(Config config, String topic, String partition, long offset) throws IOException { Properties readerKafkaProperties = config.getKafkaConsumerProperties(); this.useMockKafkaConsumer = Boolean @@ -81,8 +80,8 @@ public HDFSWrite(Config config, JsonObject lastObjectJo) throws IOException { /* The filepath should be something like hdfs:///opt/teragrep/cfe_39/srv/topic_name/0.12345 where 12345 is offset and 0 the partition. In other words the directory named topic_name holds files that are named and arranged based on partition and the partition's offset. Every partition has its own set of unique offset values. These values should be fetched from config and other input parameters (topic+partition+offset).*/ - path = config.getHdfsPath() + "/" + lastObjectJo.get("topic").getAsString(); - fileName = lastObjectJo.get("partition").getAsString() + "." + lastObjectJo.get("offset").getAsString(); // filename should be constructed from partition and offset. + path = config.getHdfsPath() + "/" + topic; + fileName = partition + "." + offset; // filename should be constructed from partition and offset. // ====== Init HDFS File System Object conf = new HdfsConfiguration(); @@ -107,8 +106,8 @@ These values should be fetched from config and other input parameters (topic+par // Code for initializing the class for kerberized HDFS database usage. hdfsuri = config.getHdfsuri(); - path = config.getHdfsPath() + "/" + lastObjectJo.get("topic").getAsString(); - fileName = lastObjectJo.get("partition").getAsString() + "." + lastObjectJo.get("offset").getAsString(); + path = config.getHdfsPath() + "/" + topic; + fileName = partition + "." + offset; // set kerberos host and realm System.setProperty("java.security.krb5.realm", config.getKerberosRealm()); @@ -174,7 +173,6 @@ public void commit(File syslogFile) { Path path = new Path(syslogFile.getPath()); fs.copyFromLocalFile(path, hdfswritepath); LOGGER.debug("End Write file into hdfs"); - boolean delete = syslogFile.delete(); // deletes the avro-file from the local disk now that it has been committed to HDFS. LOGGER.info("\nFile committed to HDFS, file writepath should be: <{}>\n", hdfswritepath); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 305035bd..8d55a95d 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -212,7 +212,7 @@ private void createReader( Consumer group is also handled here, and each consumer of the group runs on separate thread.*/ int numOfThreads = Math.min(numOfConsumers, listPartitionInfo.size()); // Makes sure that there aren't more consumers than available partitions in the consumer group. for (int threadId = 1; numOfThreads >= threadId; threadId++) { - Consumer> output = new DatabaseOutput( + Consumer> output = new DatabaseOutput( config, // Configuration settings topic, // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index a60d9d25..7f204afb 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -58,11 +58,11 @@ public class KafkaReader implements AutoCloseable { final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); private Iterator> kafkaRecordsIterator = Collections.emptyIterator(); private final Consumer kafkaConsumer; - private final java.util.function.Consumer> callbackFunction; + private final java.util.function.Consumer> callbackFunction; public KafkaReader( Consumer kafkaConsumer, - java.util.function.Consumer> callbackFunction + java.util.function.Consumer> callbackFunction ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; @@ -79,19 +79,19 @@ public void read() { kafkaRecordsIterator = kafkaRecords.iterator(); } - List recordOffsetObjectList = new ArrayList<>(); + List recordOffsetObjectList = new ArrayList<>(); while (kafkaRecordsIterator.hasNext()) { ConsumerRecord record = kafkaRecordsIterator.next(); if (LOGGER.isDebugEnabled()) { LOGGER.debug("adding from offset: <{}>", record.offset()); } recordOffsetObjectList - .add(new RecordOffset(record.topic(), record.partition(), record.offset(), record.value())); + .add(new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value())); } if (!recordOffsetObjectList.isEmpty()) { /* This is the DatabaseOutput.accept() function. - Offset and other required data for HDFS storage are added to the input parameters of the accept() function which processes the consumed record.*/ + KafkaRecord and other required data for HDFS storage are added to the input parameters of the accept() function which processes the consumed record.*/ callbackFunction.accept(recordOffsetObjectList); kafkaConsumer.commitSync(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java similarity index 95% rename from src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java rename to src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java index ee0a89a9..dc167e1d 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/Offset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java @@ -47,9 +47,7 @@ import com.teragrep.cfe_39.avro.SyslogRecord; -import java.io.IOException; - -public interface Offset { +public interface KafkaRecord { boolean isNull(); @@ -59,5 +57,5 @@ public interface Offset { String offsetToJSON(); - SyslogRecord toSyslogRecord() throws IOException; + SyslogRecord toSyslogRecord(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordConverter.java similarity index 51% rename from src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java rename to src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordConverter.java index 3f1280bd..ce7366ac 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/RecordOffset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordConverter.java @@ -48,78 +48,96 @@ import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.rlo_06.*; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.time.Instant; +import java.time.ZonedDateTime; -// This is the class for handling the Kafka record topic/partition/offset data that are required for HDFS storage. -public final class RecordOffset implements Offset { +public class KafkaRecordConverter { - private final String topic; - private final int partition; - private final long offset; - private final byte[] record; + private final SDVector eventNodeSourceSource; + private final SDVector eventNodeRelaySource; + private final SDVector eventNodeSourceSourceModule; + private final SDVector eventNodeRelaySourceModule; + private final SDVector eventNodeSourceHostname; + private final SDVector eventNodeRelayHostname; - public RecordOffset(String topic, int partition, long offset, byte[] record) { - this.topic = topic; - this.partition = partition; - this.offset = offset; - this.record = record; - } + private final SDVector teragrepStreamName; + private final SDVector teragrepDirectory; - @Override - public boolean isNull() { - return false; - } + // Origin + private final SDVector originHostname; - @Override - public byte[] record() { - return record; - } + private final RFC5424Frame rfc5424Frame; + + private final ByteBuffer sourceConcatenationBuffer; + + public KafkaRecordConverter() { + this.eventNodeSourceSource = new SDVector("event_node_source@48577", "source"); + this.eventNodeRelaySource = new SDVector("event_node_relay@48577", "source"); + this.eventNodeSourceSourceModule = new SDVector("event_node_source@48577", "source_module"); + this.eventNodeRelaySourceModule = new SDVector("event_node_relay@48577", "source_module"); + this.eventNodeSourceHostname = new SDVector("event_node_source@48577", "hostname"); + this.eventNodeRelayHostname = new SDVector("event_node_relay@48577", "hostname"); + + this.teragrepStreamName = new SDVector("teragrep@48577", "streamname"); + this.teragrepDirectory = new SDVector("teragrep@48577", "directory"); + + // Origin + this.originHostname = new SDVector("origin@48577", "hostname"); - @Override - public long size() { - return record.length; + this.rfc5424Frame = new RFC5424Frame(); + + this.sourceConcatenationBuffer = ByteBuffer.allocateDirect(256 * 1024); } - @Override - public String offsetToJSON() { - return String - .format("{\"topic\":\"%s\", \"partition\":%d, \"offset\":%d}", this.topic, this.partition, this.offset); + private long rfc3339ToEpoch(ZonedDateTime zonedDateTime) { + final Instant instant = zonedDateTime.toInstant(); + + final long MICROS_PER_SECOND = 1000L * 1000L; + final long NANOS_PER_MICROS = 1000L; + final long sec = Math.multiplyExact(instant.getEpochSecond(), MICROS_PER_SECOND); + + return Math.addExact(sec, instant.getNano() / NANOS_PER_MICROS); } - @Override - public SyslogRecord toSyslogRecord() throws ParseException, IOException { - RFC5424Frame rfc5424Frame = new RFC5424Frame(false); - InputStream inputStream = new ByteArrayInputStream(record); + public SyslogRecord convert(InputStream inputStream, String partition, long offset) { rfc5424Frame.load(inputStream); - if (rfc5424Frame.next()) { - Instant instant = new RFC5424Timestamp(rfc5424Frame.timestamp).toZonedDateTime().toInstant(); - long MICROS_PER_SECOND = 1000L * 1000L; - long NANOS_PER_MICROS = 1000L; - long sec = Math.multiplyExact(instant.getEpochSecond(), MICROS_PER_SECOND); - long epochMicros = Math.addExact(sec, instant.getNano() / NANOS_PER_MICROS); - // input - final byte[] source = eventToSource(rfc5424Frame); - // origin - final byte[] origin = eventToOrigin(rfc5424Frame); - return SyslogRecord - .newBuilder() - .setTimestamp(epochMicros) - .setPayload(rfc5424Frame.msg.toString()) - .setDirectory(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "directory")).toString()).setStream(rfc5424Frame.structuredData.getValue(new SDVector("teragrep@48577", "streamname")).toString()).setHost(rfc5424Frame.hostname.toString()).setInput(new String(source, StandardCharsets.UTF_8)).setPartition(String.valueOf(partition)).setOffset(offset).setOrigin(new String(origin, StandardCharsets.UTF_8)).build(); + try { + rfc5424Frame.next(); } - else { - return SyslogRecord.newBuilder().build(); + catch (IOException ioException) { + throw new UncheckedIOException(ioException); } + + final long epochMicros = rfc3339ToEpoch(new RFC5424Timestamp(rfc5424Frame.timestamp).toZonedDateTime()); + + // input + final byte[] source = eventToSource(); + + // origin + final byte[] origin = eventToOrigin(); + + return SyslogRecord + .newBuilder() + .setTimestamp(epochMicros) + .setPayload(rfc5424Frame.msg.toString()) + .setDirectory(rfc5424Frame.structuredData.getValue(teragrepDirectory).toString()) + .setStream(rfc5424Frame.structuredData.getValue(teragrepStreamName).toString()) + .setHost(rfc5424Frame.hostname.toString()) + .setInput(new String(source, StandardCharsets.UTF_8)) + .setPartition(String.valueOf(partition)) + .setOffset(offset) + .setOrigin(new String(origin, StandardCharsets.UTF_8)) + .build(); } - private byte[] eventToOrigin(RFC5424Frame rfc5424Frame) { + private byte[] eventToOrigin() { byte[] origin; - Fragment originFragment = rfc5424Frame.structuredData.getValue(new SDVector("origin@48577", "hostname")); + Fragment originFragment = rfc5424Frame.structuredData.getValue(originHostname); if (!originFragment.isStub) { origin = originFragment.toBytes(); } @@ -129,20 +147,17 @@ private byte[] eventToOrigin(RFC5424Frame rfc5424Frame) { return origin; } - private byte[] eventToSource(RFC5424Frame rfc5424Frame) { - /*input is produced from SD element event_node_source@48577 by - concatenating "source_module:hostname:source". in case - if event_node_source@48577 is not available use event_node_relay@48577. - If neither are present, use null value.*/ + private byte[] eventToSource() { + //input is produced from SD element event_node_source@48577 by + // concatenating "source_module:hostname:source". in case + //if event_node_source@48577 is not available use event_node_relay@48577. + //If neither are present, use null value. - ByteBuffer sourceConcatenationBuffer = ByteBuffer.allocateDirect(256 * 1024); sourceConcatenationBuffer.clear(); - Fragment sourceModuleFragment = rfc5424Frame.structuredData - .getValue(new SDVector("event_node_source@48577", "source_module")); + Fragment sourceModuleFragment = rfc5424Frame.structuredData.getValue(eventNodeSourceSourceModule); if (sourceModuleFragment.isStub) { - sourceModuleFragment = rfc5424Frame.structuredData - .getValue(new SDVector("event_node_relay@48577", "source_module")); + sourceModuleFragment = rfc5424Frame.structuredData.getValue(eventNodeRelaySourceModule); } byte[] source_module; @@ -153,11 +168,9 @@ private byte[] eventToSource(RFC5424Frame rfc5424Frame) { source_module = new byte[] {}; } - Fragment sourceHostnameFragment = rfc5424Frame.structuredData - .getValue(new SDVector("event_node_source@48577", "hostname")); + Fragment sourceHostnameFragment = rfc5424Frame.structuredData.getValue(eventNodeSourceHostname); if (sourceHostnameFragment.isStub) { - sourceHostnameFragment = rfc5424Frame.structuredData - .getValue(new SDVector("event_node_relay@48577", "hostname")); + sourceHostnameFragment = rfc5424Frame.structuredData.getValue(eventNodeRelayHostname); } byte[] source_hostname; @@ -168,11 +181,9 @@ private byte[] eventToSource(RFC5424Frame rfc5424Frame) { source_hostname = new byte[] {}; } - Fragment sourceSourceFragment = rfc5424Frame.structuredData - .getValue(new SDVector("event_node_source@48577", "source")); + Fragment sourceSourceFragment = rfc5424Frame.structuredData.getValue(eventNodeSourceSource); if (sourceHostnameFragment.isStub) { - sourceSourceFragment = rfc5424Frame.structuredData - .getValue(new SDVector("event_node_relay@48577", "source")); + sourceSourceFragment = rfc5424Frame.structuredData.getValue(eventNodeRelaySource); } byte[] source_source; @@ -183,6 +194,7 @@ private byte[] eventToSource(RFC5424Frame rfc5424Frame) { source_source = new byte[] {}; } + // source_module:hostname:source" sourceConcatenationBuffer.put(source_module); sourceConcatenationBuffer.put((byte) ':'); sourceConcatenationBuffer.put(source_hostname); @@ -195,5 +207,4 @@ private byte[] eventToSource(RFC5424Frame rfc5424Frame) { return input; } - } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java new file mode 100644 index 00000000..61dccf0b --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java @@ -0,0 +1,96 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.rlo_06.*; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; + +// This is the class for handling the Kafka record topic/partition/offset data that are required for HDFS storage. +public final class KafkaRecordImpl implements KafkaRecord { + + private final String topic; + private final int partition; + private final long offset; + private final byte[] record; + + public KafkaRecordImpl(String topic, int partition, long offset, byte[] record) { + this.topic = topic; + this.partition = partition; + this.offset = offset; + this.record = record; + } + + @Override + public boolean isNull() { + return false; + } + + @Override + public byte[] record() { + return record; + } + + @Override + public long size() { + return record.length; + } + + @Override + public String offsetToJSON() { + return String + .format("{\"topic\":\"%s\", \"partition\":%d, \"offset\":%d}", this.topic, this.partition, this.offset); + } + + @Override + public SyslogRecord toSyslogRecord() { + InputStream inputStream = new ByteArrayInputStream(record); + return new KafkaRecordConverter().convert(inputStream, String.valueOf(partition), offset); + } + +} diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullOffset.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java similarity index 97% rename from src/main/java/com/teragrep/cfe_39/consumers/kafka/NullOffset.java rename to src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java index b33b719a..18f346b7 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullOffset.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java @@ -48,7 +48,7 @@ import com.teragrep.cfe_39.avro.SyslogRecord; // Null object design pattern, used to create null offset objects. -public final class NullOffset implements Offset { +public final class NullKafkaRecord implements KafkaRecord { @Override public boolean isNull() { diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java new file mode 100644 index 00000000..0cde0609 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -0,0 +1,109 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; + +public class PartitionFile { + + private final String topic; + private final String partition; + private final Config config; + private final WritableQueue writableQueue; + private final File syslogFile; + private final SyslogAvroWriter syslogAvroWriter; + private final List syslogRecordList; + + PartitionFile(Config config, String topic, String partition) throws IOException { + this.writableQueue = new WritableQueue(config.getQueueDirectory(), topic + partition); + this.syslogFile = writableQueue.getNextWritableFile(); + this.syslogAvroWriter = new SyslogAvroWriter(syslogFile); + this.syslogRecordList = new ArrayList<>(); + this.config = config; + this.topic = topic; + this.partition = partition; + } + + public void addRecord(SyslogRecord syslogRecord) { + syslogRecordList.add(syslogRecord); + } + + public void commitRecords() throws IOException { + ListIterator syslogRecordListIterator = syslogRecordList.listIterator(); + long storedOffset = 0; + while (syslogRecordListIterator.hasNext()) { + SyslogRecord next = syslogRecordListIterator.next(); + long syslogRecordCapacity = next.toByteBuffer().capacity(); + long syslogFileCapacity = syslogAvroWriter.fileSize(); + if (config.getMaximumFileSize() < (syslogFileCapacity + syslogRecordCapacity)) { + writeToHdfs(topic, partition, storedOffset); + } + syslogAvroWriter.write(next); + storedOffset = next.getOffset(); + } + // Clear the syslogRecordList from successfully committed records. + syslogRecordList.clear(); + } + + // Writes the file to hdfs and initializes new file. + public void writeToHdfs(String topic, String partition, long offset) throws IOException { + try (HDFSWrite writer = new HDFSWrite(config, topic, partition, offset)) { + writer.commit(syslogFile); // commits the final AVRO-file to HDFS. + } + // Because using SyslogAvroWriter + syslogFile.delete(); + syslogFile.createNewFile(); + } + +} diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index 232c83bd..54fdce4d 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -60,14 +60,14 @@ public class ReadCoordinator implements Runnable { private final String queueTopic; private final Properties readerKafkaProperties; - private final Consumer> callbackFunction; + private final Consumer> callbackFunction; private boolean run = true; private final Map hdfsStartOffsets; public ReadCoordinator( String queueTopic, Properties readerKafkaProperties, - Consumer> callbackFunction, + Consumer> callbackFunction, Map hdfsStartOffsets ) { this.queueTopic = queueTopic; @@ -79,7 +79,7 @@ public ReadCoordinator( private KafkaReader createKafkaReader( Properties readerKafkaProperties, String topic, - Consumer> callbackFunction, + Consumer> callbackFunction, boolean useMockKafkaConsumer ) { diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java index 3b893d41..612e8bc5 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java @@ -98,7 +98,7 @@ public void close() throws IOException { dataFileWriter.close(); } - public long getFileSize() throws IOException { + public long fileSize() throws IOException { return syncableFileOutputStream.getChannel().size(); } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/WritableQueue.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/WritableQueue.java index 4db4116e..7cda8169 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/WritableQueue.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/WritableQueue.java @@ -65,7 +65,7 @@ public class WritableQueue { private static final Logger LOGGER = LoggerFactory.getLogger(WritableQueue.class); private final Path queueDirectory; - private String queueNamePrefix; + private final String queueNamePrefix; public WritableQueue(String queueDirectory, String queueNamePrefix) { this.queueDirectory = Paths.get(queueDirectory); @@ -108,10 +108,6 @@ public File getNextWritableFile() throws IOException { } } - public void setQueueNamePrefix(String queueNamePrefix) { - this.queueNamePrefix = queueNamePrefix; - } - private BiPredicate getFileMatcher(String queueNamePrefix) { return (path, basicFileAttributes) -> { if (!path.getFileName().toString().startsWith(queueNamePrefix)) { diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 315e6dac..2177e29c 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -115,7 +115,7 @@ public void hdfsWriteTest() { JsonObject recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); - try (HDFSWrite writer = new HDFSWrite(config, recordOffsetJo)) { + try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9)) { writer.commit(avroFile); // commits avroFile to HDFS and deletes avroFile afterward. } Assertions.assertFalse(targetFile.toFile().exists()); @@ -133,7 +133,7 @@ public void hdfsWriteTest() { recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":13}") .getAsJsonObject(); - try (HDFSWrite writer = new HDFSWrite(config, recordOffsetJo)) { + try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 13)) { writer.commit(avroFile); // commits avroFile to HDFS and deletes avroFile afterward. } Assertions.assertFalse(targetFile.toFile().exists()); @@ -163,7 +163,7 @@ public void hdfsWriteExceptionTest() { JsonObject recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); - try (HDFSWrite writer = new HDFSWrite(config, recordOffsetJo)) { + try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9)) { writer.commit(avroFile); // commits avroFile to HDFS and deletes avroFile afterward. } Assertions.assertFalse(targetFile.toFile().exists()); @@ -177,7 +177,7 @@ public void hdfsWriteExceptionTest() { recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); - HDFSWrite writer = new HDFSWrite(config, recordOffsetJo); + HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9); File finalAvroFile = avroFile; Exception e = Assertions.assertThrows(Exception.class, () -> writer.commit(finalAvroFile)); Assertions.assertEquals("File 0.9 already exists", e.getMessage()); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index af83fff1..143bc89d 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; -import com.teragrep.cfe_39.consumers.kafka.RecordOffset; +import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.rlo_06.ParseException; import com.teragrep.rlo_06.RFC5424Frame; import org.apache.kafka.common.TopicPartition; @@ -73,8 +73,8 @@ public void readCoordinatorTest2Threads() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); Config config = new Config(); Map hdfsStartOffsets = new HashMap<>(); - ArrayList> messages = new ArrayList<>(); - Consumer> output = message -> messages.add(message); + ArrayList> messages = new ArrayList<>(); + Consumer> output = message -> messages.add(message); ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", @@ -148,7 +148,7 @@ public void readCoordinatorTest2Threads() { RFC5424Frame rfc5424Frame = new RFC5424Frame(false); - RecordOffset recordOffset; + KafkaRecordImpl recordOffset; Iterator iterator = messageList.iterator(); int counter = 0; @@ -527,8 +527,8 @@ public void readCoordinatorTest1Thread() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); Config config = new Config(); Map hdfsStartOffsets = new HashMap<>(); - ArrayList> messages = new ArrayList<>(); - Consumer> output = message -> messages.add(message); + ArrayList> messages = new ArrayList<>(); + Consumer> output = message -> messages.add(message); ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", @@ -588,7 +588,7 @@ public void readCoordinatorTest1Thread() { ); RFC5424Frame rfc5424Frame = new RFC5424Frame(false); - RecordOffset recordOffset; + KafkaRecordImpl recordOffset; Iterator iterator; List partitionList = new ArrayList(); partitionList.add(7); diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 63d83e8c..5d4f7f33 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.consumers.kafka.DatabaseOutput; -import com.teragrep.cfe_39.consumers.kafka.RecordOffset; +import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import org.apache.hadoop.fs.FileSystem; @@ -117,7 +117,7 @@ public void failNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new DatabaseOutput( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -132,14 +132,14 @@ public void failNonRFC5424DatabaseOutputTest() { "12>1 2022-04-25T07:34:50.806Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - RecordOffset recordOffsetObject = new RecordOffset( + KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - List recordOffsetObjectList = new ArrayList<>(); + List recordOffsetObjectList = new ArrayList<>(); recordOffsetObjectList.add(recordOffsetObject); Exception e = Assertions.assertThrows(Exception.class, () -> output.accept(recordOffsetObjectList)); Assertions.assertEquals("com.teragrep.rlo_06.PriorityParseException: PRIORITY < missing", e.getMessage()); @@ -160,7 +160,7 @@ public void failNullRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new DatabaseOutput( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -174,18 +174,22 @@ public void failNullRFC5424DatabaseOutputTest() { "2022-04-25T07:34:50.806Z".getBytes(StandardCharsets.UTF_8), null ); - RecordOffset recordOffsetObject = new RecordOffset( + KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - List recordOffsetObjectList = new ArrayList<>(); + List recordOffsetObjectList = new ArrayList<>(); recordOffsetObjectList.add(recordOffsetObject); - NullPointerException e = Assertions - .assertThrows(NullPointerException.class, () -> output.accept(recordOffsetObjectList)); - Assertions.assertEquals("Record with null content detected during processing.", e.getMessage()); + RuntimeException e = Assertions + .assertThrows(RuntimeException.class, () -> output.accept(recordOffsetObjectList)); + Assertions + .assertEquals( + "java.lang.NullPointerException: Cannot read the array length because \"buf\" is null", + e.getMessage() + ); Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. }); diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingTest.java index 17820c7f..50a09187 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingTest.java @@ -47,7 +47,7 @@ import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.consumers.kafka.DatabaseOutput; -import com.teragrep.cfe_39.consumers.kafka.RecordOffset; +import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import org.apache.avro.file.DataFileStream; @@ -65,9 +65,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayInputStream; import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; @@ -110,6 +114,286 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } + @Test + public void normalRecordsTest() { + // Initialize and register duration statistics + DurationStatistics durationStatistics = new DurationStatistics(); + durationStatistics.register(); + + // register per topic counting + List topicCounters = new CopyOnWriteArrayList<>(); + + assertDoesNotThrow(() -> { + + Consumer> output = new DatabaseOutput( + config, // Configuration settings + "topicName", // String, the name of the topic + durationStatistics, // RuntimeStatistics object from metrics + new TopicCounter("topicName") // TopicCounter object from metrics + ); + + List recordOffsetObjectList = new ArrayList<>(); + + ConsumerRecord record = new ConsumerRecord<>( + "topicName", + 0, + 0L, + "2022-04-25T07:34:50.804Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.804Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"835bf792-91cf-44e3-976b-518330bb8fd3\" source=\"source\" unixtime=\"1650872090805\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!" + .getBytes(StandardCharsets.UTF_8) + ); + KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 1L, + "2022-04-25T07:34:50.806Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.806Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 2L, + "2022-04-25T07:34:50.822Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.822Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02\"][event_id@48577 hostname=\"jla-02\" uuid=\"1848d8a1-2f08-4a1e-bec4-ff9e6dd92553\" source=\"source\" unixtime=\"1650872090822\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] 470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi." + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 3L, + "2022-04-25T07:34:50.822Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.822Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02\"][event_id@48577 hostname=\"jla-02\" uuid=\"5e1a0398-c2a0-468d-a562-c3bb31f0f853\" source=\"source\" unixtime=\"1650872090822\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] 470646 [Thread-3] INFO com.teragrep.jla_02.Logback Audit - Logback-audit says hi." + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 4L, + "2022-04-25T07:34:50.822Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.822Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02\"][event_id@48577 hostname=\"jla-02\" uuid=\"6268c3a2-5bda-427f-acce-29416eb445f4\" source=\"source\" unixtime=\"1650872090822\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] 470647 [Thread-3] INFO com.teragrep.jla_02.Logback Metric - Logback-metric says hi." + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 5L, + "2022-04-25T07:34:52.238Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.238Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"b500dcaf-1101-4000-b6b9-bfb052ddbf86\" source=\"source\" unixtime=\"1650872092238\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.238 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info audit says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 6L, + "2022-04-25T07:34:52.239Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.239Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"05363122-51ac-4c0b-a681-f5868081f56d\" source=\"source\" unixtime=\"1650872092239\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info daily says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 7L, + "2022-04-25T07:34:52.239Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.239Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"7bbcd843-b795-4c14-b4a1-95f5d445cbcd\" source=\"source\" unixtime=\"1650872092239\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info metric says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 8L, + "2022-04-25T07:34:52.240Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.240Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"2bc0a9f9-237d-4656-b40a-3038aace37f0\" source=\"source\" unixtime=\"1650872092240\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn audit says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 9L, + "2022-04-25T07:34:52.240Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.240Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"ecf61e8d-e3a7-48ef-9b73-3c5a5243d2e6\" source=\"source\" unixtime=\"1650872092240\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn daily says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 10L, + "2022-04-25T07:34:52.241Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.241Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"bf101d5a-e816-4f51-b132-97f8e3431f8e\" source=\"source\" unixtime=\"1650872092241\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.241 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn metric says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 11L, + "2022-04-25T07:34:52.241Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.241Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"ef94d9e9-3c44-4892-b5a6-bf361d13ff97\" source=\"source\" unixtime=\"1650872092241\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.241 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error audit says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 12L, + "2022-04-25T07:34:52.242Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.242Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"5bce6e3d-767d-44b4-a044-6c4872f8f2b5\" source=\"source\" unixtime=\"1650872092242\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.242 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error daily says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 13L, + "2022-04-25T07:34:52.243Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:52.243Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"3bb55ce4-0ea7-413a-b403-28b174d7ac99\" source=\"source\" unixtime=\"1650872092243\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 14L, + "2022-04-25T07:34:52.244Z".getBytes(StandardCharsets.UTF_8), + null + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + record = new ConsumerRecord<>( + "topicName", + 0, + 15L, + "2022-04-25T07:34:52.245Z".getBytes(StandardCharsets.UTF_8), + "12>1 2022-04-25T07:34:52.245Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"3bb55ce4-0ea7-413a-b403-28b174d7ac99\" source=\"source\" unixtime=\"1650872092243\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" + .getBytes(StandardCharsets.UTF_8) + ); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); + recordOffsetObjectList.add(recordOffsetObject); + + output.accept(recordOffsetObjectList); + // FIXME: file is not being recognized as avro file for some reason. + }); + } + @Test public void skipNonRFC5424DatabaseOutputTest() { // Initialize and register duration statistics @@ -121,7 +405,7 @@ public void skipNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new DatabaseOutput( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -136,14 +420,14 @@ public void skipNonRFC5424DatabaseOutputTest() { "12>1 2022-04-25T07:34:50.806Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - RecordOffset recordOffsetObject = new RecordOffset( + KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - List recordOffsetObjectList = new ArrayList<>(); + List recordOffsetObjectList = new ArrayList<>(); recordOffsetObjectList.add(recordOffsetObject); output.accept(recordOffsetObjectList); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); @@ -179,7 +463,7 @@ public void skipNullRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new DatabaseOutput( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -193,14 +477,14 @@ public void skipNullRFC5424DatabaseOutputTest() { "2022-04-25T07:34:50.806Z".getBytes(StandardCharsets.UTF_8), null ); - RecordOffset recordOffsetObject = new RecordOffset( + KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - List recordOffsetObjectList = new ArrayList<>(); + List recordOffsetObjectList = new ArrayList<>(); recordOffsetObjectList.add(recordOffsetObject); output.accept(recordOffsetObjectList); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); @@ -236,14 +520,14 @@ public void skipNullAndNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new DatabaseOutput( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics ); - List recordOffsetObjectList = new ArrayList<>(); + List recordOffsetObjectList = new ArrayList<>(); ConsumerRecord record = new ConsumerRecord<>( "topicName", @@ -252,7 +536,7 @@ public void skipNullAndNonRFC5424DatabaseOutputTest() { "2022-04-25T07:34:50.806Z".getBytes(StandardCharsets.UTF_8), null ); - RecordOffset recordOffsetObject = new RecordOffset( + KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), @@ -268,7 +552,12 @@ record = new ConsumerRecord<>( "12>1 2022-04-25T07:34:50.807Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new RecordOffset(record.topic(), record.partition(), record.offset(), record.value()); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); recordOffsetObjectList.add(recordOffsetObject); record = new ConsumerRecord<>( "topicName", @@ -278,7 +567,12 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:50.807Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new RecordOffset(record.topic(), record.partition(), record.offset(), record.value()); + recordOffsetObject = new KafkaRecordImpl( + record.topic(), + record.partition(), + record.offset(), + record.value() + ); recordOffsetObjectList.add(recordOffsetObject); output.accept(recordOffsetObjectList); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); From 27b7c709b245c4ebf3a7d460d42f668efd08667d Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 23 Aug 2024 13:23:57 +0300 Subject: [PATCH 04/77] Continuing refactoring DatabaseOutput.java and related classes, WIP. Renamed DatabaseOutput.java to BatchDistribution.java. Removed unused record() and isNull() methods from KafkaRecord interface and replaced record() usage in tests. --- ...baseOutput.java => BatchDistribution.java} | 92 ++----- .../consumers/kafka/HdfsDataIngestion.java | 2 +- .../cfe_39/consumers/kafka/KafkaReader.java | 2 +- .../cfe_39/consumers/kafka/KafkaRecord.java | 4 - .../consumers/kafka/KafkaRecordImpl.java | 17 +- .../consumers/kafka/NullKafkaRecord.java | 10 - .../cfe_39/consumers/kafka/PartitionFile.java | 72 +++-- .../consumers/kafka/SyslogAvroWriter.java | 13 +- .../teragrep/cfe_39/KafkaConsumerTest.java | 249 ++++++++---------- .../cfe_39/ProcessingFailureTest.java | 6 +- .../com/teragrep/cfe_39/ProcessingTest.java | 15 +- 11 files changed, 209 insertions(+), 273 deletions(-) rename src/main/java/com/teragrep/cfe_39/consumers/kafka/{DatabaseOutput.java => BatchDistribution.java} (59%) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java similarity index 59% rename from src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java rename to src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java index c458f956..ffb72b0c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/DatabaseOutput.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java @@ -49,7 +49,6 @@ import com.teragrep.cfe_39.Config; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import com.teragrep.cfe_39.metrics.DurationStatistics; -import com.teragrep.rlo_06.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,20 +61,19 @@ The target where the record is stored in HDFS is based on the topic, partition and offset. ie. topic_name/0.123456 where offset is 123456 The mock consumer is activated for testing using the configuration file: readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")*/ -public class DatabaseOutput implements Consumer> { +public class BatchDistribution implements Consumer> { + + private static final Logger LOGGER = LoggerFactory.getLogger(BatchDistribution.class); - private static final Logger LOGGER = LoggerFactory.getLogger(DatabaseOutput.class); private final String topic; private final DurationStatistics durationStatistics; private final TopicCounter topicCounter; private long lastTimeCalled; private final Config config; - private final boolean skipNonRFC5424Records; - private final boolean skipEmptyRFC5424Records; private final Map partitionFileMap; // BatchDistribution? RecordDistribution? - public DatabaseOutput( + public BatchDistribution( Config config, String topic, DurationStatistics durationStatistics, @@ -85,20 +83,15 @@ public DatabaseOutput( this.topic = topic; this.durationStatistics = durationStatistics; this.topicCounter = topicCounter; - this.skipNonRFC5424Records = config.getSkipNonRFC5424Records(); - this.skipEmptyRFC5424Records = config.getSkipEmptyRFC5424Records(); this.partitionFileMap = new HashMap<>(); - this.lastTimeCalled = Instant.now().toEpochMilli(); } /* Input parameter is a batch of RecordOffsetObjects from kafka. Each object contains a record and its metadata (topic, partition and offset). - Each partition will get their set of exclusive AVRO-files in HDFS. - The target where the record is stored in HDFS is based on the topic, partition and last offset. ie. topic_name/0.123456 where last written record's offset is 123456. - AVRO-file with a path/name that starts with topic_name/0.X should only contain records from the 0th partition of topic named topic_name, topic_name/1.X should only contain records from 1st partition, etc. - AVRO-files are created dynamically, thus it is not known which record (and its offset) is written to the file last before committing it to HDFS. The final name for the HDFS file is decided only when the file is committed to HDFS.*/ + * Distributes the received kafka record batch to PartitionFile objects based on topic partition which the record originates from. + * */ @Override - public void accept(List recordOffsetObjectList) { + public void accept(List batch) { long thisTime = Instant.now().toEpochMilli(); long ftook = thisTime - lastTimeCalled; topicCounter.setKafkaLatency(ftook); @@ -106,71 +99,37 @@ public void accept(List recordOffsetObjectList) { LOGGER .debug( "Fuura searching your batch for <[{}]> with records <{}> and took <{}> milliseconds. <{}> EPS. ", - topic, recordOffsetObjectList.size(), (ftook), - (recordOffsetObjectList.size() * 1000L / ftook) + topic, batch.size(), (ftook), (batch.size() * 1000L / ftook) ); } long batchBytes = 0L; + long start = Instant.now().toEpochMilli(); + // Starts measuring performance here. Measures how long it takes to process the whole batch. - /* The recordOffsetObjectList loop will go through all the objects in the list. - The objects can serialize their contents into SyslogRecords that can be stored to an AVRO-file. - When the file size is about to go above 64M, commit the file into HDFS using the latest topic/partition/offset values as the filename and start fresh with a new empty AVRO-file. - Serialize the object that was going to make the file go above 64M into the now empty AVRO-file. - .*/ - long start = Instant.now().toEpochMilli(); // Starts measuring performance here. Measures how long it takes to process the whole recordOffsetObjectList. - ListIterator recordOffsetListIterator = recordOffsetObjectList.listIterator(); + // Distribute the records of the batch to a PartitionFile object based on partition from which the record originates from. + ListIterator recordOffsetListIterator = batch.listIterator(); while (recordOffsetListIterator.hasNext()) { - // process recordOffsetObjectList here, the consumer only consumes 500 records in a single batch so the file can't be committed during a single accept(). - // Distribute the records to a PartitionFile object based on partition from which the record originates from. - - // load the next KafkaRecord KafkaRecordImpl next = recordOffsetListIterator.next(); - // Read the topic, partition and offset information of the record JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); - String topic = recordOffset.get("topic").getAsString(); - String partition = recordOffset.get("partition").getAsString(); - // Pass the record to the PartitionFile object that it belongs to. If the correct PartitionFile doesn't exist, create one. - if (!partitionFileMap.containsKey(partition)) { + // If the PartitionFile corresponding to the record's partition doesn't exist, create one. + if (!partitionFileMap.containsKey(recordOffset.get("partition").getAsString())) { try { - partitionFileMap.put(partition, new PartitionFile(config, topic, partition)); + partitionFileMap + .put(recordOffset.get("partition").getAsString(), new PartitionFile(config, recordOffset.get("topic").getAsString(), recordOffset.get("partition").getAsString())); } catch (IOException e) { + LOGGER.error("Failed to create new PartitionFile for record <{}>", recordOffset); throw new RuntimeException(e); } } // Every PartitionFile object will hold responsibility over a single unique file that is related to a single topic partition. - PartitionFile recordPartitionFile = partitionFileMap.get(partition); + PartitionFile recordPartitionFile = partitionFileMap.get(recordOffset.get("partition").getAsString()); // Tell PartitionFile to add the current record to the list of records that are going to be added to the file. Handle skipping of broken records. - try { - recordPartitionFile.addRecord(next.toSyslogRecord()); - } - catch (ParseException e) { - if (skipNonRFC5424Records) { - LOGGER - .warn( - "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", - recordOffset, e - ); - } - else { - throw new RuntimeException(e); - } - } - catch (NullPointerException e) { - if (skipEmptyRFC5424Records) { - LOGGER - .warn( - "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", - recordOffset, e - ); - } - else { - throw new RuntimeException(e); - } - } + recordPartitionFile.addRecord(next); + batchBytes = batchBytes + next.size(); // metrics } - // When all records in the current batch have been distributed to different PartitionFile objects successfully, commit the adding of records to the files for all PartitionFile objects. + // When all records in the current batch have been distributed to different PartitionFile objects successfully, proceed to adding the records to the files for all PartitionFile objects. partitionFileMap.forEach((key, value) -> { try { value.commitRecords(); @@ -178,6 +137,7 @@ public void accept(List recordOffsetObjectList) { catch (IOException e) { LOGGER.error("Failed to write the SyslogRecords to PartitionFile <{}> in topic <{}>", key, topic); // FIXME: Delete the files that were stored to HDFS before the exception hit, to make sure data integrity is preserved during consumer rebalance as kafka consumer will not mark the failed record batch as committed. + // Maybe create a list of files that were stored to HDFS during the accept() call, which is then cleared at the very end of accept(). throw new RuntimeException(e); } }); @@ -189,19 +149,19 @@ public void accept(List recordOffsetObjectList) { if (took == 0) { took = 1; } - long rps = recordOffsetObjectList.size() * 1000L / took; + long rps = batch.size() * 1000L / took; topicCounter.setRecordsPerSecond(rps); long bps = batchBytes * 1000 / took; topicCounter.setBytesPerSecond(bps); - durationStatistics.addAndGetRecords(recordOffsetObjectList.size()); + durationStatistics.addAndGetRecords(batch.size()); durationStatistics.addAndGetBytes(batchBytes); topicCounter.addToTotalBytes(batchBytes); - topicCounter.addToTotalRecords(recordOffsetObjectList.size()); + topicCounter.addToTotalRecords(batch.size()); if (LOGGER.isDebugEnabled()) { LOGGER .debug( "Sent batch for <[{}]> with records <{}> and size <{}> KB took <{}> milliseconds. <{}> RPS. <{}> KB/s ", - topic, recordOffsetObjectList.size(), batchBytes / 1024, (took), rps, bps / 1024 + topic, batch.size(), batchBytes / 1024, (took), rps, bps / 1024 ); } lastTimeCalled = Instant.now().toEpochMilli(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 8d55a95d..f668ee84 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -212,7 +212,7 @@ private void createReader( Consumer group is also handled here, and each consumer of the group runs on separate thread.*/ int numOfThreads = Math.min(numOfConsumers, listPartitionInfo.size()); // Makes sure that there aren't more consumers than available partitions in the consumer group. for (int threadId = 1; numOfThreads >= threadId; threadId++) { - Consumer> output = new DatabaseOutput( + Consumer> output = new BatchDistribution( config, // Configuration settings topic, // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index 7f204afb..841628c6 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -90,7 +90,7 @@ public void read() { } if (!recordOffsetObjectList.isEmpty()) { - /* This is the DatabaseOutput.accept() function. + /* This is the BatchDistribution.accept() function. KafkaRecord and other required data for HDFS storage are added to the input parameters of the accept() function which processes the consumed record.*/ callbackFunction.accept(recordOffsetObjectList); kafkaConsumer.commitSync(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java index dc167e1d..723d93ed 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java @@ -49,10 +49,6 @@ public interface KafkaRecord { - boolean isNull(); - - byte[] record(); - long size(); String offsetToJSON(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java index 61dccf0b..6fb36229 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java @@ -66,19 +66,14 @@ public KafkaRecordImpl(String topic, int partition, long offset, byte[] record) this.record = record; } - @Override - public boolean isNull() { - return false; - } - - @Override - public byte[] record() { - return record; - } - @Override public long size() { - return record.length; + if (record == null) { + return 0; + } + else { + return record.length; + } } @Override diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java index 18f346b7..20b644ae 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java @@ -50,16 +50,6 @@ // Null object design pattern, used to create null offset objects. public final class NullKafkaRecord implements KafkaRecord { - @Override - public boolean isNull() { - return true; - } - - @Override - public byte[] record() { - return new byte[0]; - } - @Override public long size() { return 0; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java index 0cde0609..dabf59f9 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -48,6 +48,9 @@ import com.teragrep.cfe_39.Config; import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; +import com.teragrep.rlo_06.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; @@ -57,53 +60,86 @@ public class PartitionFile { + private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFile.class); + private final String topic; private final String partition; private final Config config; private final WritableQueue writableQueue; private final File syslogFile; - private final SyslogAvroWriter syslogAvroWriter; - private final List syslogRecordList; + private final List kafkaRecordList; PartitionFile(Config config, String topic, String partition) throws IOException { this.writableQueue = new WritableQueue(config.getQueueDirectory(), topic + partition); this.syslogFile = writableQueue.getNextWritableFile(); - this.syslogAvroWriter = new SyslogAvroWriter(syslogFile); - this.syslogRecordList = new ArrayList<>(); + // FIXME: Because avro writer can't delete content the file must be remade from scratch with a new SyslogAvroWriter object. + this.kafkaRecordList = new ArrayList<>(); this.config = config; this.topic = topic; this.partition = partition; } - public void addRecord(SyslogRecord syslogRecord) { - syslogRecordList.add(syslogRecord); + public void addRecord(KafkaRecordImpl kafkaRecord) { + kafkaRecordList.add(kafkaRecord); } public void commitRecords() throws IOException { - ListIterator syslogRecordListIterator = syslogRecordList.listIterator(); + ListIterator kafkaRecordListIterator = kafkaRecordList.listIterator(); long storedOffset = 0; - while (syslogRecordListIterator.hasNext()) { - SyslogRecord next = syslogRecordListIterator.next(); - long syslogRecordCapacity = next.toByteBuffer().capacity(); - long syslogFileCapacity = syslogAvroWriter.fileSize(); + while (kafkaRecordListIterator.hasNext()) { + KafkaRecordImpl next = kafkaRecordListIterator.next(); + SyslogRecord syslogRecord = null; // FIXME: NO NULLS + try { + syslogRecord = next.toSyslogRecord(); + } + catch (ParseException e) { + if (config.getSkipNonRFC5424Records()) { + LOGGER + .warn( + "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", + next.offsetToJSON(), e + ); + } + else { + LOGGER.error("Failed to parse RFC5424 record <{}>", next.offsetToJSON()); + throw new RuntimeException(e); + } + } + catch (NullPointerException e) { + if (config.getSkipEmptyRFC5424Records()) { + LOGGER + .warn( + "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", + next.offsetToJSON(), e + ); + } + else { + LOGGER.error("Failed to parse RFC5424 record <{}> because of null content", next.offsetToJSON()); + throw new RuntimeException(e); + } + } + long syslogRecordCapacity = syslogRecord.toByteBuffer().capacity(); + long syslogFileCapacity = syslogFile.length(); + // When the file size is about to go above 64M, commit the file into HDFS using the latest topic/partition/offset values as the filename and start fresh with a new empty AVRO-file. if (config.getMaximumFileSize() < (syslogFileCapacity + syslogRecordCapacity)) { writeToHdfs(topic, partition, storedOffset); } - syslogAvroWriter.write(next); - storedOffset = next.getOffset(); + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + syslogAvroWriter.write(syslogRecord); + } + storedOffset = syslogRecord.getOffset(); } - // Clear the syslogRecordList from successfully committed records. - syslogRecordList.clear(); + // Clear the kafkaRecordList from successfully committed records. + kafkaRecordList.clear(); } // Writes the file to hdfs and initializes new file. public void writeToHdfs(String topic, String partition, long offset) throws IOException { try (HDFSWrite writer = new HDFSWrite(config, topic, partition, offset)) { + //syslogAvroWriter.close(); writer.commit(syslogFile); // commits the final AVRO-file to HDFS. } - // Because using SyslogAvroWriter - syslogFile.delete(); - syslogFile.createNewFile(); + // FIXME: Must re-initialize the avro-file as an empty file. } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java index 612e8bc5..926fca77 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java @@ -61,19 +61,18 @@ public class SyslogAvroWriter implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(SyslogAvroWriter.class); - private final DatumWriter datumWriter = new SpecificDatumWriter<>(SyslogRecord.class); - + private final DatumWriter datumWriter; private final SyncableFileOutputStream syncableFileOutputStream; - - private final DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter); + private final DataFileWriter dataFileWriter; + private final File syslogFile; public SyslogAvroWriter(File syslogFile) throws IOException { + this.syslogFile = syslogFile; + datumWriter = new SpecificDatumWriter<>(SyslogRecord.class); + dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.setCodec(CodecFactory.snappyCodec()); - syncableFileOutputStream = new SyncableFileOutputStream(syslogFile); - syncableFileOutputStream.getChannel().tryLock(); - if (syslogFile.length() == 0) { // new file dataFileWriter.create(SyslogRecord.getClassSchema(), syncableFileOutputStream); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 143bc89d..7cf4264f 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -48,14 +48,12 @@ import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.rlo_06.ParseException; -import com.teragrep.rlo_06.RFC5424Frame; import org.apache.kafka.common.TopicPartition; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.ByteArrayInputStream; import java.util.*; import java.util.function.Consumer; @@ -146,188 +144,172 @@ public void readCoordinatorTest2Threads() { "25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" ); - RFC5424Frame rfc5424Frame = new RFC5424Frame(false); - - KafkaRecordImpl recordOffset; + KafkaRecordImpl kafkaRecord; Iterator iterator = messageList.iterator(); int counter = 0; for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - ParseException e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord = kafkaRecord; + ParseException e = Assertions.assertThrows(ParseException.class, finalKafkaRecord::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); + Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord1 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord1::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord2 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord2::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord3 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord3::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(0).get(counter); + kafkaRecord = messages.get(0).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord4 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord4::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -336,181 +318,167 @@ public void readCoordinatorTest2Threads() { counter = 0; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord5 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord5::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + + KafkaRecordImpl finalKafkaRecord6 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord6::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord7 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord7::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord8 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord8::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; iterator = messageList.iterator(); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + i + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); counter++; } - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + 14 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, kafkaRecord.size()); counter++; - recordOffset = messages.get(1).get(counter); + kafkaRecord = messages.get(1).get(counter); Assertions .assertEquals( "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + 15 + "}", - recordOffset.offsetToJSON() + kafkaRecord.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + KafkaRecordImpl finalKafkaRecord9 = kafkaRecord; + e = Assertions.assertThrows(ParseException.class, finalKafkaRecord9::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; @@ -587,7 +555,6 @@ public void readCoordinatorTest1Thread() { "25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" ); - RFC5424Frame rfc5424Frame = new RFC5424Frame(false); KafkaRecordImpl recordOffset; Iterator iterator; List partitionList = new ArrayList(); @@ -612,11 +579,8 @@ public void readCoordinatorTest1Thread() { + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - Assertions.assertTrue(rfc5424Frame.next()); Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), rfc5424Frame.msg.toString()); - Assertions.assertFalse(rfc5424Frame.next()); + Assertions.assertEquals(iterator.next(), recordOffset.toSyslogRecord().getPayload().toString()); counter++; } @@ -627,7 +591,7 @@ public void readCoordinatorTest1Thread() { + "}", recordOffset.offsetToJSON() ); - Assertions.assertNull(recordOffset.record()); + Assertions.assertEquals(0, recordOffset.size()); counter++; recordOffset = messages.get(0).get(counter); @@ -637,8 +601,7 @@ public void readCoordinatorTest1Thread() { + "}", recordOffset.offsetToJSON() ); - rfc5424Frame.load(new ByteArrayInputStream(recordOffset.record())); - ParseException e = Assertions.assertThrows(ParseException.class, rfc5424Frame::next); + ParseException e = Assertions.assertThrows(ParseException.class, recordOffset::toSyslogRecord); Assertions.assertEquals("PRIORITY < missing", e.getMessage()); counter++; } diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 5d4f7f33..84e84e52 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.consumers.kafka.DatabaseOutput; +import com.teragrep.cfe_39.consumers.kafka.BatchDistribution; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; @@ -117,7 +117,7 @@ public void failNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new BatchDistribution( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -160,7 +160,7 @@ public void failNullRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new BatchDistribution( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingTest.java index 50a09187..06b47a2a 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.consumers.kafka.DatabaseOutput; +import com.teragrep.cfe_39.consumers.kafka.BatchDistribution; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; @@ -65,13 +65,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.ByteArrayInputStream; import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; @@ -125,7 +121,7 @@ public void normalRecordsTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new BatchDistribution( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -390,6 +386,7 @@ record = new ConsumerRecord<>( recordOffsetObjectList.add(recordOffsetObject); output.accept(recordOffsetObjectList); + // FIXME: file is not being recognized as avro file for some reason. }); } @@ -405,7 +402,7 @@ public void skipNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new BatchDistribution( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -463,7 +460,7 @@ public void skipNullRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new BatchDistribution( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -520,7 +517,7 @@ public void skipNullAndNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new DatabaseOutput( + Consumer> output = new BatchDistribution( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics From 10454563f51c5251a97cc3c210e096786991feec Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 23 Aug 2024 16:13:48 +0300 Subject: [PATCH 05/77] Added missing boolean input parameter to SyncableFileOutputStream() to allow appending via SyslogAvroWriter. Improved exception handling in PartitionFile. Refactoring ProcessingTest.java --- .../cfe_39/consumers/kafka/PartitionFile.java | 23 +++-- .../consumers/kafka/SyslogAvroWriter.java | 4 +- .../com/teragrep/cfe_39/ProcessingTest.java | 85 +++++++++++++++++-- 3 files changed, 99 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java index dabf59f9..a5d256d4 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -45,6 +45,8 @@ */ package com.teragrep.cfe_39.consumers.kafka; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; import com.teragrep.cfe_39.Config; import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; @@ -72,7 +74,6 @@ public class PartitionFile { PartitionFile(Config config, String topic, String partition) throws IOException { this.writableQueue = new WritableQueue(config.getQueueDirectory(), topic + partition); this.syslogFile = writableQueue.getNextWritableFile(); - // FIXME: Because avro writer can't delete content the file must be remade from scratch with a new SyslogAvroWriter object. this.kafkaRecordList = new ArrayList<>(); this.config = config; this.topic = topic; @@ -88,7 +89,7 @@ public void commitRecords() throws IOException { long storedOffset = 0; while (kafkaRecordListIterator.hasNext()) { KafkaRecordImpl next = kafkaRecordListIterator.next(); - SyslogRecord syslogRecord = null; // FIXME: NO NULLS + SyslogRecord syslogRecord; try { syslogRecord = next.toSyslogRecord(); } @@ -99,6 +100,11 @@ public void commitRecords() throws IOException { "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", next.offsetToJSON(), e ); + JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); + if (recordOffset.get("offset").getAsLong() > storedOffset) { + storedOffset = recordOffset.get("offset").getAsLong(); + } + continue; } else { LOGGER.error("Failed to parse RFC5424 record <{}>", next.offsetToJSON()); @@ -112,6 +118,11 @@ public void commitRecords() throws IOException { "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", next.offsetToJSON(), e ); + JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); + if (recordOffset.get("offset").getAsLong() > storedOffset) { + storedOffset = recordOffset.get("offset").getAsLong(); + } + continue; } else { LOGGER.error("Failed to parse RFC5424 record <{}> because of null content", next.offsetToJSON()); @@ -124,10 +135,13 @@ public void commitRecords() throws IOException { if (config.getMaximumFileSize() < (syslogFileCapacity + syslogRecordCapacity)) { writeToHdfs(topic, partition, storedOffset); } + // SyslogAvroWriter initialization will re-initialize the syslogFile if it has been deleted because of writeToHdfs(). try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { syslogAvroWriter.write(syslogRecord); } - storedOffset = syslogRecord.getOffset(); + if (syslogRecord.getOffset() > storedOffset) { + storedOffset = syslogRecord.getOffset(); + } } // Clear the kafkaRecordList from successfully committed records. kafkaRecordList.clear(); @@ -136,10 +150,9 @@ public void commitRecords() throws IOException { // Writes the file to hdfs and initializes new file. public void writeToHdfs(String topic, String partition, long offset) throws IOException { try (HDFSWrite writer = new HDFSWrite(config, topic, partition, offset)) { - //syslogAvroWriter.close(); writer.commit(syslogFile); // commits the final AVRO-file to HDFS. } - // FIXME: Must re-initialize the avro-file as an empty file. + syslogFile.delete(); // Deletes the file as all the contents have been stored to HDFS. } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java index 926fca77..6da05f6d 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java @@ -64,14 +64,12 @@ public class SyslogAvroWriter implements AutoCloseable { private final DatumWriter datumWriter; private final SyncableFileOutputStream syncableFileOutputStream; private final DataFileWriter dataFileWriter; - private final File syslogFile; public SyslogAvroWriter(File syslogFile) throws IOException { - this.syslogFile = syslogFile; datumWriter = new SpecificDatumWriter<>(SyslogRecord.class); dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.setCodec(CodecFactory.snappyCodec()); - syncableFileOutputStream = new SyncableFileOutputStream(syslogFile); + syncableFileOutputStream = new SyncableFileOutputStream(syslogFile, true); syncableFileOutputStream.getChannel().tryLock(); if (syslogFile.length() == 0) { // new file diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingTest.java index 06b47a2a..f06711a9 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingTest.java @@ -50,7 +50,9 @@ import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; +import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileStream; +import org.apache.avro.io.DatumReader; import org.apache.avro.specific.SpecificDatumReader; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; @@ -58,10 +60,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.kafka.clients.consumer.ConsumerRecord; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -108,6 +107,11 @@ public void teardownMiniCluster() { }); hdfsCluster.shutdown(); FileUtil.fullyDelete(baseDir); + File queueDirectory = new File(config.getQueueDirectory()); + File[] files = queueDirectory.listFiles(); + if (files[0].getName().equals("topicName0.1")) { + files[0].delete(); + } } @Test @@ -387,10 +391,79 @@ record = new ConsumerRecord<>( output.accept(recordOffsetObjectList); - // FIXME: file is not being recognized as avro file for some reason. + // Assert that records 10-13 are present in local avro-file. + + File queueDirectory = new File(config.getQueueDirectory()); + File[] files = queueDirectory.listFiles(); + Assertions.assertEquals(1, files.length); + + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader dataFileReader = new DataFileReader<>(files[0], datumReader); + Assertions.assertTrue(dataFileReader.hasNext()); + SyslogRecord next = dataFileReader.next(); + Assertions.assertEquals(10, next.getOffset()); + Assertions.assertTrue(dataFileReader.hasNext()); + next = dataFileReader.next(); + Assertions.assertEquals(11, next.getOffset()); + Assertions.assertTrue(dataFileReader.hasNext()); + next = dataFileReader.next(); + Assertions.assertEquals(12, next.getOffset()); + Assertions.assertTrue(dataFileReader.hasNext()); + next = dataFileReader.next(); + Assertions.assertEquals(13, next.getOffset()); + Assertions.assertFalse(dataFileReader.hasNext()); + + // Assert that records 0-9 are present in HDFS + + Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.9"))); + Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.9"); + //Init input stream + FSDataInputStream inputStream = fs.open(hdfsreadpath); + //The data is in AVRO-format, so it can't be read as a string. + DataFileStream reader = new DataFileStream<>( + inputStream, + new SpecificDatumReader<>(SyslogRecord.class) + ); + SyslogRecord syslogRecord = null; + LOGGER.info("\nReading records from file {}:", hdfsreadpath); + + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(0, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(1, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(2, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(3, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(4, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(5, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(6, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(7, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(8, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(9, syslogRecord.getOffset()); + Assertions.assertFalse(reader.hasNext()); + }); } + @Disabled(value = "This needs refactoring") @Test public void skipNonRFC5424DatabaseOutputTest() { // Initialize and register duration statistics @@ -449,6 +522,7 @@ public void skipNonRFC5424DatabaseOutputTest() { } + @Disabled(value = "This needs refactoring") @Test public void skipNullRFC5424DatabaseOutputTest() { // Initialize and register duration statistics @@ -506,6 +580,7 @@ public void skipNullRFC5424DatabaseOutputTest() { } + @Disabled(value = "This needs refactoring") @Test public void skipNullAndNonRFC5424DatabaseOutputTest() { // Initialize and register duration statistics From d47536d03784440ea87681b3b022d11f7b326f1a Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 26 Aug 2024 13:27:27 +0300 Subject: [PATCH 06/77] Added additional test configuration file. --- pom.xml | 1 + .../largeFile.application.properties | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 src/test/resources/largeFile.application.properties diff --git a/pom.xml b/pom.xml index bda84206..86edd078 100644 --- a/pom.xml +++ b/pom.xml @@ -199,6 +199,7 @@ src/test/resources/broken.application.properties src/test/resources/valid.application.properties src/test/resources/failProcessing.application.properties + src/test/resources/largeFile.application.properties rpm/resources/config.jaas rpm/resources/log4j2.properties rpm/resources/application.properties diff --git a/src/test/resources/largeFile.application.properties b/src/test/resources/largeFile.application.properties new file mode 100644 index 00000000..b6ffbc3a --- /dev/null +++ b/src/test/resources/largeFile.application.properties @@ -0,0 +1,46 @@ +# What topics are searched from kafka, regex +queueTopicPattern=^testConsumerTopic-*$ +# Number of consumers created to the consumer groups +numOfConsumers=2 +# Kafka bootstrap servers +consumer.bootstrap.servers=test +# Offset, should not be touched +consumer.auto.offset.reset=earliest +# Autocommit, should not be touched +consumer.enable.auto.commit=false +# Consumer group id, this is to track the progress of reading hte topic +consumer.group.id=cfe_39 +# Used security protocol and mechanism +consumer.security.protocol=SASL_PLAINTEXT +consumer.sasl.mechanism=PLAIN +# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger +consumer.max.poll.records=500 +# How much data can be fetched in one go +consumer.fetch.max.bytes=1073741820 +# How long for request before timing out. Note that too big max poll records size can cause this to trigger +consumer.request.timeout.ms=300000 +consumer.max.poll.interval.ms=300000 +# For testing only, remove for prod. +consumer.useMockKafkaConsumer=true +# The maximum file size for AVRO-files that are to be stored in HDFS database. +maximumFileSize=3000000 +# Boolean for deciding if records not in RFC5424 should be skipped or not. +skipNonRFC5424Records=true +# Boolean for deciding if empty RFC5424 records should be skipped or not. +skipEmptyRFC5424Records=true +# HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L +pruneOffset=157784760000 +# HDFS uri +hdfsuri=hdfs://localhost:45937/ +# Kerberos +java.security.krb5.kdc=test +java.security.krb5.realm=test +hadoop.security.authentication=test +hadoop.security.authorization=test +dfs.namenode.kerberos.principal.pattern=test +KerberosKeytabUser=test +KerberosKeytabPath=test +dfs.client.use.datanode.hostname=false +kerberosLoginAutorenewal=true +dfs.data.transfer.protection=test +dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file From 82f42ce80ca36faf94e18ed2f3439a567a04da73 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 26 Aug 2024 13:36:11 +0300 Subject: [PATCH 07/77] Renamed ProcessingTest.java to BatchDistributionTest.java. --- .../{ProcessingTest.java => BatchDistributionTest.java} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/test/java/com/teragrep/cfe_39/{ProcessingTest.java => BatchDistributionTest.java} (99%) diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java similarity index 99% rename from src/test/java/com/teragrep/cfe_39/ProcessingTest.java rename to src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index f06711a9..839b703f 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -75,9 +75,9 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; // Tests for processing of consumed kafka records with skipping of broken records enabled (both null and non rfc5424). -public class ProcessingTest { +public class BatchDistributionTest { - private static final Logger LOGGER = LoggerFactory.getLogger(ProcessingTest.class); + private static final Logger LOGGER = LoggerFactory.getLogger(BatchDistributionTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; From ff29a8057cb54ab9541dd188aa0083bb3bd1376e Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 26 Aug 2024 16:33:27 +0300 Subject: [PATCH 08/77] Altered logic behind the usage of MaximumFileSize config parameter. Added preliminary version of method for writing the file managed by PartitionFile to HDFS while skipping file size checks. --- .../cfe_39/consumers/kafka/PartitionFile.java | 30 +++++++++++++------ .../cfe_39/BatchDistributionTest.java | 15 +++++----- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java index a5d256d4..58720df3 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -67,17 +67,18 @@ public class PartitionFile { private final String topic; private final String partition; private final Config config; - private final WritableQueue writableQueue; private final File syslogFile; private final List kafkaRecordList; + private final List batchOffsets; PartitionFile(Config config, String topic, String partition) throws IOException { - this.writableQueue = new WritableQueue(config.getQueueDirectory(), topic + partition); + WritableQueue writableQueue = new WritableQueue(config.getQueueDirectory(), topic + partition); this.syslogFile = writableQueue.getNextWritableFile(); this.kafkaRecordList = new ArrayList<>(); this.config = config; this.topic = topic; this.partition = partition; + this.batchOffsets = new ArrayList<>(); } public void addRecord(KafkaRecordImpl kafkaRecord) { @@ -129,11 +130,9 @@ public void commitRecords() throws IOException { throw new RuntimeException(e); } } - long syslogRecordCapacity = syslogRecord.toByteBuffer().capacity(); - long syslogFileCapacity = syslogFile.length(); - // When the file size is about to go above 64M, commit the file into HDFS using the latest topic/partition/offset values as the filename and start fresh with a new empty AVRO-file. - if (config.getMaximumFileSize() < (syslogFileCapacity + syslogRecordCapacity)) { - writeToHdfs(topic, partition, storedOffset); + // When the file size has gone above the maximum, commit the file into HDFS using the latest topic/partition/offset values as the filename and then delete the local avro-file. + if (config.getMaximumFileSize() < syslogFile.length()) { + writeToHdfs(storedOffset); } // SyslogAvroWriter initialization will re-initialize the syslogFile if it has been deleted because of writeToHdfs(). try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { @@ -145,14 +144,27 @@ public void commitRecords() throws IOException { } // Clear the kafkaRecordList from successfully committed records. kafkaRecordList.clear(); + // Store the last offset of the batch to a list. + if (storedOffset > 0) { + batchOffsets.add(storedOffset); + } + } + + public void writeToHdfsEarly() throws IOException { + if (!batchOffsets.isEmpty()) { + writeToHdfs(batchOffsets.get(batchOffsets.size()-1)); + } } // Writes the file to hdfs and initializes new file. - public void writeToHdfs(String topic, String partition, long offset) throws IOException { + private void writeToHdfs(long offset) throws IOException { try (HDFSWrite writer = new HDFSWrite(config, topic, partition, offset)) { writer.commit(syslogFile); // commits the final AVRO-file to HDFS. } - syslogFile.delete(); // Deletes the file as all the contents have been stored to HDFS. + syslogFile.delete(); // Delete the file as all the contents have been stored to HDFS. + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + // NoOp, syslogAvroWriter has initialized the empty AVRO-file. + } } } diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 839b703f..1076cce6 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -391,7 +391,7 @@ record = new ConsumerRecord<>( output.accept(recordOffsetObjectList); - // Assert that records 10-13 are present in local avro-file. + // Assert that records 11-13 are present in local avro-file. File queueDirectory = new File(config.getQueueDirectory()); File[] files = queueDirectory.listFiles(); @@ -401,9 +401,6 @@ record = new ConsumerRecord<>( DataFileReader dataFileReader = new DataFileReader<>(files[0], datumReader); Assertions.assertTrue(dataFileReader.hasNext()); SyslogRecord next = dataFileReader.next(); - Assertions.assertEquals(10, next.getOffset()); - Assertions.assertTrue(dataFileReader.hasNext()); - next = dataFileReader.next(); Assertions.assertEquals(11, next.getOffset()); Assertions.assertTrue(dataFileReader.hasNext()); next = dataFileReader.next(); @@ -411,13 +408,12 @@ record = new ConsumerRecord<>( Assertions.assertTrue(dataFileReader.hasNext()); next = dataFileReader.next(); Assertions.assertEquals(13, next.getOffset()); - Assertions.assertFalse(dataFileReader.hasNext()); - // Assert that records 0-9 are present in HDFS + // Assert that records 0-10 are present in HDFS Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.9"))); - Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.9"); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.10"))); + Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.10"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -458,6 +454,9 @@ record = new ConsumerRecord<>( Assertions.assertTrue(reader.hasNext()); syslogRecord = reader.next(syslogRecord); Assertions.assertEquals(9, syslogRecord.getOffset()); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions.assertEquals(10, syslogRecord.getOffset()); Assertions.assertFalse(reader.hasNext()); }); From 971e0609cc4676013f82566f282ad5d45fecd87e Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 27 Aug 2024 10:25:35 +0300 Subject: [PATCH 09/77] Refactoring PartitionFile.java further. Implemented PartitionFile interface and renamed original PartitionFile.java to PartitionFileImpl.java. --- .../consumers/kafka/BatchDistribution.java | 28 +-- .../cfe_39/consumers/kafka/PartitionFile.java | 120 +----------- .../consumers/kafka/PartitionFileImpl.java | 174 ++++++++++++++++++ 3 files changed, 193 insertions(+), 129 deletions(-) create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java index ffb72b0c..543aea07 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java @@ -70,7 +70,7 @@ public class BatchDistribution implements Consumer> { private final TopicCounter topicCounter; private long lastTimeCalled; private final Config config; - private final Map partitionFileMap; + private final Map partitionFileMap; // BatchDistribution? RecordDistribution? public BatchDistribution( @@ -88,7 +88,7 @@ public BatchDistribution( } /* Input parameter is a batch of RecordOffsetObjects from kafka. Each object contains a record and its metadata (topic, partition and offset). - * Distributes the received kafka record batch to PartitionFile objects based on topic partition which the record originates from. + * Distributes the received kafka record batch to PartitionFileImpl objects based on topic partition which the record originates from. * */ @Override public void accept(List batch) { @@ -106,38 +106,40 @@ public void accept(List batch) { long start = Instant.now().toEpochMilli(); // Starts measuring performance here. Measures how long it takes to process the whole batch. - // Distribute the records of the batch to a PartitionFile object based on partition from which the record originates from. + // Distribute the records of the batch to a PartitionFileImpl object based on partition from which the record originates from. ListIterator recordOffsetListIterator = batch.listIterator(); while (recordOffsetListIterator.hasNext()) { KafkaRecordImpl next = recordOffsetListIterator.next(); JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); - // If the PartitionFile corresponding to the record's partition doesn't exist, create one. + // If the PartitionFileImpl corresponding to the record's partition doesn't exist, create one. if (!partitionFileMap.containsKey(recordOffset.get("partition").getAsString())) { try { partitionFileMap - .put(recordOffset.get("partition").getAsString(), new PartitionFile(config, recordOffset.get("topic").getAsString(), recordOffset.get("partition").getAsString())); + .put(recordOffset.get("partition").getAsString(), new PartitionFileImpl(config, recordOffset)); } catch (IOException e) { - LOGGER.error("Failed to create new PartitionFile for record <{}>", recordOffset); + LOGGER.error("Failed to create new PartitionFileImpl for record <{}>", recordOffset); throw new RuntimeException(e); } } - // Every PartitionFile object will hold responsibility over a single unique file that is related to a single topic partition. - PartitionFile recordPartitionFile = partitionFileMap.get(recordOffset.get("partition").getAsString()); - // Tell PartitionFile to add the current record to the list of records that are going to be added to the file. Handle skipping of broken records. + // Every PartitionFileImpl object will hold responsibility over a single unique file that is related to a single topic partition. + PartitionFileImpl recordPartitionFile = partitionFileMap.get(recordOffset.get("partition").getAsString()); + // Tell PartitionFileImpl to add the current record to the list of records that are going to be added to the file. recordPartitionFile.addRecord(next); batchBytes = batchBytes + next.size(); // metrics } - // When all records in the current batch have been distributed to different PartitionFile objects successfully, proceed to adding the records to the files for all PartitionFile objects. + // When all records in the current batch have been distributed to different PartitionFileImpl objects successfully, proceed to adding the records to the files for all PartitionFileImpl objects. partitionFileMap.forEach((key, value) -> { try { value.commitRecords(); + // FIXME: Implement timeout checks for when the PartitionFileImpl object last time wrote to HDFS. + // Something like implementing lastTimeCalled on .writeToHdfs(), which is then checked during .commitRecords(). } catch (IOException e) { - LOGGER.error("Failed to write the SyslogRecords to PartitionFile <{}> in topic <{}>", key, topic); - // FIXME: Delete the files that were stored to HDFS before the exception hit, to make sure data integrity is preserved during consumer rebalance as kafka consumer will not mark the failed record batch as committed. - // Maybe create a list of files that were stored to HDFS during the accept() call, which is then cleared at the very end of accept(). + LOGGER.error("Failed to write the SyslogRecords to PartitionFileImpl <{}> in topic <{}>", key, topic); + // FIXME: Handle the issue of rebalancing the kafka consumer group in case an exception is thrown after part of the batch is stored to HDFS. + // Fail fast and restart the whole cfe_39 so the kafka consumer group offsets can be fetched again from the files stored in HDFS. throw new RuntimeException(e); } }); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java index 58720df3..d5fade83 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -45,126 +45,14 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; -import com.teragrep.cfe_39.Config; -import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; -import com.teragrep.rlo_06.ParseException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.ListIterator; - -public class PartitionFile { - - private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFile.class); - - private final String topic; - private final String partition; - private final Config config; - private final File syslogFile; - private final List kafkaRecordList; - private final List batchOffsets; - - PartitionFile(Config config, String topic, String partition) throws IOException { - WritableQueue writableQueue = new WritableQueue(config.getQueueDirectory(), topic + partition); - this.syslogFile = writableQueue.getNextWritableFile(); - this.kafkaRecordList = new ArrayList<>(); - this.config = config; - this.topic = topic; - this.partition = partition; - this.batchOffsets = new ArrayList<>(); - } - public void addRecord(KafkaRecordImpl kafkaRecord) { - kafkaRecordList.add(kafkaRecord); - } +public interface PartitionFile { - public void commitRecords() throws IOException { - ListIterator kafkaRecordListIterator = kafkaRecordList.listIterator(); - long storedOffset = 0; - while (kafkaRecordListIterator.hasNext()) { - KafkaRecordImpl next = kafkaRecordListIterator.next(); - SyslogRecord syslogRecord; - try { - syslogRecord = next.toSyslogRecord(); - } - catch (ParseException e) { - if (config.getSkipNonRFC5424Records()) { - LOGGER - .warn( - "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", - next.offsetToJSON(), e - ); - JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); - if (recordOffset.get("offset").getAsLong() > storedOffset) { - storedOffset = recordOffset.get("offset").getAsLong(); - } - continue; - } - else { - LOGGER.error("Failed to parse RFC5424 record <{}>", next.offsetToJSON()); - throw new RuntimeException(e); - } - } - catch (NullPointerException e) { - if (config.getSkipEmptyRFC5424Records()) { - LOGGER - .warn( - "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", - next.offsetToJSON(), e - ); - JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); - if (recordOffset.get("offset").getAsLong() > storedOffset) { - storedOffset = recordOffset.get("offset").getAsLong(); - } - continue; - } - else { - LOGGER.error("Failed to parse RFC5424 record <{}> because of null content", next.offsetToJSON()); - throw new RuntimeException(e); - } - } - // When the file size has gone above the maximum, commit the file into HDFS using the latest topic/partition/offset values as the filename and then delete the local avro-file. - if (config.getMaximumFileSize() < syslogFile.length()) { - writeToHdfs(storedOffset); - } - // SyslogAvroWriter initialization will re-initialize the syslogFile if it has been deleted because of writeToHdfs(). - try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { - syslogAvroWriter.write(syslogRecord); - } - if (syslogRecord.getOffset() > storedOffset) { - storedOffset = syslogRecord.getOffset(); - } - } - // Clear the kafkaRecordList from successfully committed records. - kafkaRecordList.clear(); - // Store the last offset of the batch to a list. - if (storedOffset > 0) { - batchOffsets.add(storedOffset); - } - } + void addRecord(KafkaRecordImpl kafkaRecord); - public void writeToHdfsEarly() throws IOException { - if (!batchOffsets.isEmpty()) { - writeToHdfs(batchOffsets.get(batchOffsets.size()-1)); - } - } + void commitRecords() throws IOException; - // Writes the file to hdfs and initializes new file. - private void writeToHdfs(long offset) throws IOException { - try (HDFSWrite writer = new HDFSWrite(config, topic, partition, offset)) { - writer.commit(syslogFile); // commits the final AVRO-file to HDFS. - } - syslogFile.delete(); // Delete the file as all the contents have been stored to HDFS. - try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { - // NoOp, syslogAvroWriter has initialized the empty AVRO-file. - } - } + void writeToHdfsEarly() throws IOException; } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java new file mode 100644 index 00000000..6e35b1b4 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -0,0 +1,174 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; +import com.teragrep.rlo_06.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; + +public class PartitionFileImpl implements PartitionFile { + + private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); + + JsonObject topicPartition; + private final Config config; + private final File syslogFile; + private final List kafkaRecordList; + private final List batchOffsets; + + PartitionFileImpl(Config config, JsonObject topicPartition) throws IOException { + WritableQueue writableQueue = new WritableQueue( + config.getQueueDirectory(), + topicPartition.get("topic").getAsString() + topicPartition.get("partition").getAsString() + ); + this.syslogFile = writableQueue.getNextWritableFile(); + this.kafkaRecordList = new ArrayList<>(); + this.config = config; + this.topicPartition = topicPartition; + this.batchOffsets = new ArrayList<>(); + } + + public void addRecord(KafkaRecordImpl kafkaRecord) { + kafkaRecordList.add(kafkaRecord); + } + + public void commitRecords() throws IOException { + ListIterator kafkaRecordListIterator = kafkaRecordList.listIterator(); + long storedOffset = 0; + while (kafkaRecordListIterator.hasNext()) { + KafkaRecordImpl next = kafkaRecordListIterator.next(); + SyslogRecord syslogRecord; + try { + syslogRecord = next.toSyslogRecord(); + } + catch (ParseException e) { + if (config.getSkipNonRFC5424Records()) { + LOGGER + .warn( + "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", + next.offsetToJSON(), e + ); + JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); + if (recordOffset.get("offset").getAsLong() > storedOffset) { + storedOffset = recordOffset.get("offset").getAsLong(); + } + continue; + } + else { + LOGGER.error("Failed to parse RFC5424 record <{}>", next.offsetToJSON()); + throw new RuntimeException(e); + } + } + catch (NullPointerException e) { + if (config.getSkipEmptyRFC5424Records()) { + LOGGER + .warn( + "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", + next.offsetToJSON(), e + ); + JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); + if (recordOffset.get("offset").getAsLong() > storedOffset) { + storedOffset = recordOffset.get("offset").getAsLong(); + } + continue; + } + else { + LOGGER.error("Failed to parse RFC5424 record <{}> because of null content", next.offsetToJSON()); + throw new RuntimeException(e); + } + } + // When the file size has gone above the maximum, commit the file into HDFS using the latest topic/partition/offset values as the filename and then delete the local avro-file. + if (config.getMaximumFileSize() < syslogFile.length()) { + writeToHdfs(storedOffset); + } + // SyslogAvroWriter initialization will re-initialize the syslogFile if it has been deleted because of writeToHdfs(). + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + syslogAvroWriter.write(syslogRecord); + } + if (syslogRecord.getOffset() > storedOffset) { + storedOffset = syslogRecord.getOffset(); + } + } + // Clear the kafkaRecordList from successfully committed records. + kafkaRecordList.clear(); + // Store the last offset of the batch to a list. + if (storedOffset > 0) { + batchOffsets.add(storedOffset); + } + } + + public void writeToHdfsEarly() throws IOException { + if (!batchOffsets.isEmpty()) { + writeToHdfs(batchOffsets.get(batchOffsets.size() - 1)); + } + } + + // Writes the file to hdfs and initializes new file. + private void writeToHdfs(long offset) throws IOException { + try ( + HDFSWrite writer = new HDFSWrite(config, topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset) + ) { + writer.commit(syslogFile); // commits the final AVRO-file to HDFS. + } + syslogFile.delete(); // Delete the file as all the contents have been stored to HDFS. + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + // NoOp, syslogAvroWriter has initialized the empty AVRO-file. + } + batchOffsets.clear(); + } + +} From 04f2abbd5c948c6056aa3aaa2b49370d56ece871 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 27 Aug 2024 12:01:01 +0300 Subject: [PATCH 10/77] Refactoring PartitionFile.java further. Implemented PartitionRecords interface and PartitionRecordsImpl class which generates SyslogReccords list from KafkaRecords. --- .../consumers/kafka/PartitionFileImpl.java | 66 ++--------- .../consumers/kafka/PartitionRecords.java | 58 +++++++++ .../consumers/kafka/PartitionRecordsImpl.java | 111 ++++++++++++++++++ 3 files changed, 179 insertions(+), 56 deletions(-) create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 6e35b1b4..4715eaae 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -46,11 +46,9 @@ package com.teragrep.cfe_39.consumers.kafka; import com.google.gson.JsonObject; -import com.google.gson.JsonParser; import com.teragrep.cfe_39.Config; import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; -import com.teragrep.rlo_06.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,7 +56,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.ListIterator; public class PartitionFileImpl implements PartitionFile { @@ -67,8 +64,8 @@ public class PartitionFileImpl implements PartitionFile { JsonObject topicPartition; private final Config config; private final File syslogFile; - private final List kafkaRecordList; private final List batchOffsets; + private final PartitionRecordsImpl partitionRecords; PartitionFileImpl(Config config, JsonObject topicPartition) throws IOException { WritableQueue writableQueue = new WritableQueue( @@ -76,75 +73,32 @@ public class PartitionFileImpl implements PartitionFile { topicPartition.get("topic").getAsString() + topicPartition.get("partition").getAsString() ); this.syslogFile = writableQueue.getNextWritableFile(); - this.kafkaRecordList = new ArrayList<>(); this.config = config; this.topicPartition = topicPartition; this.batchOffsets = new ArrayList<>(); + this.partitionRecords = new PartitionRecordsImpl(config); } public void addRecord(KafkaRecordImpl kafkaRecord) { - kafkaRecordList.add(kafkaRecord); + partitionRecords.addRecord(kafkaRecord); } public void commitRecords() throws IOException { - ListIterator kafkaRecordListIterator = kafkaRecordList.listIterator(); + List syslogRecordList = partitionRecords.toSyslogRecordList(); long storedOffset = 0; - while (kafkaRecordListIterator.hasNext()) { - KafkaRecordImpl next = kafkaRecordListIterator.next(); - SyslogRecord syslogRecord; - try { - syslogRecord = next.toSyslogRecord(); - } - catch (ParseException e) { - if (config.getSkipNonRFC5424Records()) { - LOGGER - .warn( - "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", - next.offsetToJSON(), e - ); - JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); - if (recordOffset.get("offset").getAsLong() > storedOffset) { - storedOffset = recordOffset.get("offset").getAsLong(); - } - continue; - } - else { - LOGGER.error("Failed to parse RFC5424 record <{}>", next.offsetToJSON()); - throw new RuntimeException(e); - } + for (SyslogRecord next : syslogRecordList) { + // SyslogAvroWriter initialization will re-initialize the syslogFile if it has been deleted because of writeToHdfs(). + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + syslogAvroWriter.write(next); } - catch (NullPointerException e) { - if (config.getSkipEmptyRFC5424Records()) { - LOGGER - .warn( - "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", - next.offsetToJSON(), e - ); - JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); - if (recordOffset.get("offset").getAsLong() > storedOffset) { - storedOffset = recordOffset.get("offset").getAsLong(); - } - continue; - } - else { - LOGGER.error("Failed to parse RFC5424 record <{}> because of null content", next.offsetToJSON()); - throw new RuntimeException(e); - } + if (next.getOffset() > storedOffset) { + storedOffset = next.getOffset(); } // When the file size has gone above the maximum, commit the file into HDFS using the latest topic/partition/offset values as the filename and then delete the local avro-file. if (config.getMaximumFileSize() < syslogFile.length()) { writeToHdfs(storedOffset); } - // SyslogAvroWriter initialization will re-initialize the syslogFile if it has been deleted because of writeToHdfs(). - try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { - syslogAvroWriter.write(syslogRecord); - } - if (syslogRecord.getOffset() > storedOffset) { - storedOffset = syslogRecord.getOffset(); - } } - // Clear the kafkaRecordList from successfully committed records. - kafkaRecordList.clear(); // Store the last offset of the batch to a list. if (storedOffset > 0) { batchOffsets.add(storedOffset); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java new file mode 100644 index 00000000..2b12e279 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java @@ -0,0 +1,58 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.teragrep.cfe_39.avro.SyslogRecord; + +import java.util.List; + +public interface PartitionRecords { + + void addRecord(KafkaRecordImpl kafkaRecord); + + List toSyslogRecordList(); + +} diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java new file mode 100644 index 00000000..d0ab1653 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java @@ -0,0 +1,111 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.rlo_06.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class PartitionRecordsImpl implements PartitionRecords { + + private static final Logger LOGGER = LoggerFactory.getLogger(PartitionRecordsImpl.class); + + private final List kafkaRecordList; + private final Config config; + + public PartitionRecordsImpl(Config config) { + this.kafkaRecordList = new ArrayList<>(); + this.config = config; + } + + @Override + public void addRecord(KafkaRecordImpl kafkaRecord) { + this.kafkaRecordList.add(kafkaRecord); + } + + @Override + public List toSyslogRecordList() { + List syslogRecordList = new ArrayList<>(); + for (KafkaRecordImpl next : kafkaRecordList) { + try { + syslogRecordList.add(next.toSyslogRecord()); + } + catch (ParseException e) { + if (config.getSkipNonRFC5424Records()) { + LOGGER + .warn( + "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", + next.offsetToJSON(), e + ); + } + else { + LOGGER.error("Failed to parse RFC5424 record <{}>", next.offsetToJSON()); + throw new RuntimeException(e); + } + } + catch (NullPointerException e) { + if (config.getSkipEmptyRFC5424Records()) { + LOGGER + .warn( + "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", + next.offsetToJSON(), e + ); + } + else { + LOGGER.error("Failed to parse RFC5424 record <{}> because of null content", next.offsetToJSON()); + throw new RuntimeException(e); + } + } + } + kafkaRecordList.clear(); + return syslogRecordList; + } +} From a2948a80ba0d5c6e24addb8df613a80a1942ef1e Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 27 Aug 2024 12:21:58 +0300 Subject: [PATCH 11/77] Added missing private final statement. --- .../com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 4715eaae..0fe2e915 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -61,7 +61,7 @@ public class PartitionFileImpl implements PartitionFile { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); - JsonObject topicPartition; + private final JsonObject topicPartition; private final Config config; private final File syslogFile; private final List batchOffsets; @@ -87,7 +87,6 @@ public void commitRecords() throws IOException { List syslogRecordList = partitionRecords.toSyslogRecordList(); long storedOffset = 0; for (SyslogRecord next : syslogRecordList) { - // SyslogAvroWriter initialization will re-initialize the syslogFile if it has been deleted because of writeToHdfs(). try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { syslogAvroWriter.write(next); } From d654064f80a62d5482811be683abd5f1f2f61ebd Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 27 Aug 2024 12:26:17 +0300 Subject: [PATCH 12/77] Added SyslogAvroWriterTest.java. --- .../teragrep/cfe_39/SyslogAvroWriterTest.java | 174 ++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java new file mode 100644 index 00000000..adcfb20b --- /dev/null +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -0,0 +1,174 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39; + +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; +import com.teragrep.cfe_39.consumers.kafka.SyslogAvroWriter; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +public class SyslogAvroWriterTest { + + private static Config config; + + // Prepares known state for testing. + @BeforeEach + public void startMiniCluster() { + assertDoesNotThrow(() -> { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + config = new Config(); + }); + } + + // Teardown the minicluster + @AfterEach + public void teardownMiniCluster() { + File queueDirectory = new File(config.getQueueDirectory()); + File[] files = queueDirectory.listFiles(); + if (files[0].getName().equals("topicName0.1")) { + files[0].delete(); + } + } + + @Test + public void writeTest() { + + assertDoesNotThrow(() -> { + + File queueDirectory = new File(config.getQueueDirectory()); + + File syslogFile = new File(config.getQueueDirectory() + File.separator + "topicName0.1"); + + ConsumerRecord record0 = new ConsumerRecord<>( + "topicName", + 0, + 0L, + "2022-04-25T07:34:50.804Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.804Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"835bf792-91cf-44e3-976b-518330bb8fd3\" source=\"source\" unixtime=\"1650872090805\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!" + .getBytes(StandardCharsets.UTF_8) + ); + KafkaRecordImpl recordOffsetObject0 = new KafkaRecordImpl( + record0.topic(), + record0.partition(), + record0.offset(), + record0.value() + ); + + ConsumerRecord record1 = new ConsumerRecord<>( + "topicName", + 0, + 1L, + "2022-04-25T07:34:50.806Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.806Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" + .getBytes(StandardCharsets.UTF_8) + ); + KafkaRecordImpl recordOffsetObject1 = new KafkaRecordImpl( + record1.topic(), + record1.partition(), + record1.offset(), + record1.value() + ); + + ConsumerRecord record2 = new ConsumerRecord<>( + "topicName", + 0, + 2L, + "2022-04-25T07:34:50.822Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.822Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02\"][event_id@48577 hostname=\"jla-02\" uuid=\"1848d8a1-2f08-4a1e-bec4-ff9e6dd92553\" source=\"source\" unixtime=\"1650872090822\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] 470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi." + .getBytes(StandardCharsets.UTF_8) + ); + KafkaRecordImpl recordOffsetObject2 = new KafkaRecordImpl( + record2.topic(), + record2.partition(), + record2.offset(), + record2.value() + ); + + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + syslogAvroWriter.write(recordOffsetObject0.toSyslogRecord()); + syslogAvroWriter.write(recordOffsetObject1.toSyslogRecord()); + syslogAvroWriter.write(recordOffsetObject2.toSyslogRecord()); + } + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + syslogAvroWriter.write(recordOffsetObject2.toSyslogRecord()); + } + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader dataFileReader = new DataFileReader<>(syslogFile, datumReader); + Assertions.assertTrue(dataFileReader.hasNext()); + SyslogRecord next = dataFileReader.next(); + Assertions.assertEquals(0, next.getOffset()); + Assertions.assertTrue(dataFileReader.hasNext()); + next = dataFileReader.next(); + + Assertions.assertEquals(1, next.getOffset()); + Assertions.assertTrue(dataFileReader.hasNext()); + next = dataFileReader.next(); + + Assertions.assertEquals(2, next.getOffset()); + Assertions.assertTrue(dataFileReader.hasNext()); + next = dataFileReader.next(); + + Assertions.assertEquals(2, next.getOffset()); + dataFileReader.close(); + + }); + + } +} From a94ff51e7d473c66bef9a307554de8374615b345 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 27 Aug 2024 12:36:27 +0300 Subject: [PATCH 13/77] Refactored Config to be immutable. Refactored old Config setter usage to use secondary constructor instead. Added Config tests. --- src/main/java/com/teragrep/cfe_39/Config.java | 17 ++++++---- .../cfe_39/BatchDistributionTest.java | 1 + .../java/com/teragrep/cfe_39/ConfigTest.java | 31 +++++++++++++++++++ .../java/com/teragrep/cfe_39/HdfsTest.java | 1 + .../teragrep/cfe_39/Ingestion0FilesTest.java | 1 + .../cfe_39/Ingestion1Old1NewFileTest.java | 1 + .../cfe_39/Ingestion2NewFilesTest.java | 1 + .../cfe_39/Ingestion2OldFilesTest.java | 1 + .../cfe_39/ProcessingFailureTest.java | 1 + .../teragrep/cfe_39/PruningNoFilesTest.java | 1 + .../cfe_39/PruningOneNewFileTest.java | 1 + .../cfe_39/PruningOneOldFileTest.java | 1 + .../cfe_39/PruningOneOldOneNewFileTest.java | 1 + .../cfe_39/PruningTwoNewFilesTest.java | 1 + .../cfe_39/PruningTwoOldFilesTest.java | 1 + .../cfe_39/TestMiniClusterFactory.java | 2 -- 16 files changed, 55 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/Config.java b/src/main/java/com/teragrep/cfe_39/Config.java index c29e1ed9..bf170675 100644 --- a/src/main/java/com/teragrep/cfe_39/Config.java +++ b/src/main/java/com/teragrep/cfe_39/Config.java @@ -64,7 +64,7 @@ public class Config { private final Properties kafkaConsumerProperties; private static final Logger LOGGER = LoggerFactory.getLogger(Config.class); private final String hdfsPath; - private String hdfsuri; + private final String hdfsuri; private final String queueDirectory; private final String kerberosHost; private final String kerberosRealm; @@ -84,6 +84,10 @@ public class Config { private final String dfsEncryptDataTransferCipherSuites; public Config() throws IOException { + this(""); + } + + public Config(String hdfsuri) throws IOException { Properties properties = new Properties(); Path configPath = Paths .get(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); @@ -96,7 +100,12 @@ public Config() throws IOException { // HDFS this.hdfsPath = properties.getProperty("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); - this.hdfsuri = properties.getProperty("hdfsuri"); + if (hdfsuri.isEmpty() || hdfsuri == null) { + this.hdfsuri = properties.getProperty("hdfsuri"); + } + else { + this.hdfsuri = hdfsuri; + } if (this.hdfsuri == null) { throw new IllegalArgumentException("hdfsuri not set"); } @@ -206,10 +215,6 @@ public String getHdfsPath() { return hdfsPath; } - public void setHdfsuri(String input) { - this.hdfsuri = input; - } - public String getHdfsuri() { return hdfsuri; } diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 1076cce6..f4599461 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -95,6 +95,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/ConfigTest.java b/src/test/java/com/teragrep/cfe_39/ConfigTest.java index 7fc13bf0..61df970a 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigTest.java @@ -85,4 +85,35 @@ public void brokenConfigTest() { }); Assertions.assertEquals("hdfsuri not set", e.getMessage()); } + + + @Test + public void configEqualityTest() { + assertDoesNotThrow(() -> { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + Config config1 = new Config(); + Config config2 = new Config(); + Config config3 = new Config("12345"); + Config config4 = new Config("12345"); + Assertions.assertNotEquals(config1, config2); + Assertions.assertNotEquals(config1, config3); + Assertions.assertNotEquals(config3, config4); + }); + } + + @Test + public void configConstructorTest() { + assertDoesNotThrow(() -> { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + Config config1 = new Config(); + Config config2 = new Config("12345"); + Assertions.assertEquals(config1.getHdfsuri(), "hdfs://localhost:45937/"); + Assertions.assertEquals(config2.getHdfsuri(), "12345"); + }); + } + } diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 2177e29c..e722358a 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -83,6 +83,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index cf2fe882..ebc87956 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -84,6 +84,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 79174e47..7a154886 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -86,6 +86,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files to HDFS where one file has new timestamp and other old, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 7c8e7db8..55a6ccae 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -91,6 +91,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 4424918b..fecbb351 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -86,6 +86,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 84e84e52..99278e67 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -92,6 +92,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index f89603d3..dc194f20 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -81,6 +81,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); }); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index bcd06660..a15cf55c 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -85,6 +85,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts a single pre-made avro-file with a new timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index 0e7445f3..5b6c6a9e 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -85,6 +85,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts a single pre-made avro-file with an olf timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index 483e36dc..9b11f0e1 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -85,6 +85,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); /* Inserts pre-made avro-files to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index 0f3b450c..e386ef61 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -85,6 +85,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index 0a5ae764..2d912752 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -85,6 +85,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java index e7fbfb8f..7aefb5fb 100644 --- a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java +++ b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java @@ -62,8 +62,6 @@ public MiniDFSCluster create(Config config, File baseDir) throws IOException { conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath()); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); hdfsCluster = builder.build(); - String hdfsURI = "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"; - config.setHdfsuri(hdfsURI); DistributedFileSystem fileSystem = hdfsCluster.getFileSystem(); return hdfsCluster; } From cdec71e075d558355049d139eb0ceb69bcba53b9 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 27 Aug 2024 12:45:50 +0300 Subject: [PATCH 14/77] Moved Config.java to com.teragrep.cfe_39.configuration package for further refactoring. --- src/main/java/com/teragrep/cfe_39/Main.java | 1 + .../java/com/teragrep/cfe_39/{ => configuration}/Config.java | 2 +- .../teragrep/cfe_39/consumers/kafka/BatchDistribution.java | 2 +- .../java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java | 2 +- .../java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java | 2 +- .../java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java | 2 +- .../teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java | 4 ++-- .../teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java | 2 +- .../teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java | 2 +- src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java | 1 + src/test/java/com/teragrep/cfe_39/ConfigTest.java | 2 +- src/test/java/com/teragrep/cfe_39/HdfsTest.java | 1 + src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java | 1 + .../java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java | 1 + src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java | 1 + src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java | 1 + src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java | 1 + src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java | 1 + src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java | 1 + src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java | 1 + src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java | 1 + .../java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java | 1 + src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java | 1 + src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java | 1 + src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java | 1 + src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java | 1 + 26 files changed, 27 insertions(+), 10 deletions(-) rename src/main/java/com/teragrep/cfe_39/{ => configuration}/Config.java (99%) diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index bb4e633c..fddd3aed 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/com/teragrep/cfe_39/Config.java b/src/main/java/com/teragrep/cfe_39/configuration/Config.java similarity index 99% rename from src/main/java/com/teragrep/cfe_39/Config.java rename to src/main/java/com/teragrep/cfe_39/configuration/Config.java index bf170675..c50e9803 100644 --- a/src/main/java/com/teragrep/cfe_39/Config.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Config.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.cfe_39; +package com.teragrep.cfe_39.configuration; import org.apache.logging.log4j.core.config.Configurator; import org.slf4j.Logger; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java index 543aea07..8a340322 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39.consumers.kafka; import com.google.gson.*; -import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import com.teragrep.cfe_39.metrics.DurationStatistics; import org.slf4j.Logger; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java index 7a3fddb6..59702a10 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.configuration.Config; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java index bf1f92f6..bb6d4758 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.configuration.Config; import org.apache.hadoop.fs.*; import org.apache.kafka.common.TopicPartition; import org.slf4j.Logger; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index f6b8c81e..584daf7d 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.configuration.Config; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index f668ee84..dd327e29 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.metrics.*; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import org.apache.hadoop.fs.FileSystem; @@ -210,7 +210,7 @@ private void createReader( /* Every consumer is run in a separate thread. Consumer group is also handled here, and each consumer of the group runs on separate thread.*/ - int numOfThreads = Math.min(numOfConsumers, listPartitionInfo.size()); // Makes sure that there aren't more consumers than available partitions in the consumer group. + int numOfThreads = Math.min(numOfConsumers, listPartitionInfo.size()); // FIXME: Alter the equation for calculating the number of threads. for (int threadId = 1; numOfThreads >= threadId; threadId++) { Consumer> output = new BatchDistribution( config, // Configuration settings diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 0fe2e915..952092f0 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39.consumers.kafka; import com.google.gson.JsonObject; -import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; import org.slf4j.Logger; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java index d0ab1653..e877bf2c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.Config; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.rlo_06.ParseException; import org.slf4j.Logger; diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index f4599461..49000dc1 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -46,6 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.BatchDistribution; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; diff --git a/src/test/java/com/teragrep/cfe_39/ConfigTest.java b/src/test/java/com/teragrep/cfe_39/ConfigTest.java index 61df970a..7ebde247 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -86,7 +87,6 @@ public void brokenConfigTest() { Assertions.assertEquals("hdfsuri not set", e.getMessage()); } - @Test public void configEqualityTest() { assertDoesNotThrow(() -> { diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index e722358a..773706fd 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -47,6 +47,7 @@ import com.google.gson.JsonObject; import com.google.gson.JsonParser; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HDFSWrite; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index ebc87956..81f0b7df 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -46,6 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileStream; import org.apache.avro.specific.SpecificDatumReader; diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 7a154886..5866a526 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 55a6ccae..0adfe4bb 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -46,6 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileStream; import org.apache.avro.specific.SpecificDatumReader; diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index fecbb351..ac179b75 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 7cf4264f..366a0763 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.rlo_06.ParseException; diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 99278e67..fe5a08d5 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.BatchDistribution; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index dc194f20..1d026961 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index a15cf55c..49ad6e13 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index 5b6c6a9e..a9898cb6 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index 9b11f0e1..007959c4 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index e386ef61..df6916f4 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index 2d912752..f5eb5c3b 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index adcfb20b..1649fa2b 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -46,6 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.consumers.kafka.SyslogAvroWriter; import org.apache.avro.file.DataFileReader; diff --git a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java index 7aefb5fb..e03c158e 100644 --- a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java +++ b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.configuration.Config; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; From ef7f93a107cdb8d59c4d318ca5edf46ad399abae Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 27 Aug 2024 15:33:16 +0300 Subject: [PATCH 15/77] Added logic for handling consumer group rebalance properly. Refactored Config.java by replacing missed setter with secondary constructor. Beginning refactoring tests to mirror the refactoring changes. --- .../teragrep/cfe_39/configuration/Config.java | 27 ++- .../consumers/kafka/BatchDistribution.java | 6 +- .../cfe_39/consumers/kafka/KafkaReader.java | 8 +- .../consumers/kafka/PartitionFileImpl.java | 4 + .../cfe_39/BatchDistributionTest.java | 212 +++++++----------- .../java/com/teragrep/cfe_39/HdfsTest.java | 2 + .../teragrep/cfe_39/Ingestion0FilesTest.java | 9 +- .../cfe_39/Ingestion1Old1NewFileTest.java | 9 +- .../cfe_39/Ingestion2NewFilesTest.java | 9 +- .../cfe_39/Ingestion2OldFilesTest.java | 9 +- 10 files changed, 121 insertions(+), 174 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Config.java b/src/main/java/com/teragrep/cfe_39/configuration/Config.java index c50e9803..445dd0ed 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Config.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Config.java @@ -75,7 +75,7 @@ public class Config { private final String kerberosKeytabPath; private final String kerberosLoginAutorenewal; private final String kerberosTestMode; - private long maximumFileSize; + private final long maximumFileSize; private final int numOfConsumers; private final long pruneOffset; private final boolean skipNonRFC5424Records; @@ -84,10 +84,18 @@ public class Config { private final String dfsEncryptDataTransferCipherSuites; public Config() throws IOException { - this(""); + this("", 0); + } + + public Config(long maximumFileSize) throws IOException { + this("", maximumFileSize); } public Config(String hdfsuri) throws IOException { + this(hdfsuri, 0); + } + + public Config(String hdfsuri, long maximumFileSize) throws IOException { Properties properties = new Properties(); Path configPath = Paths .get(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); @@ -118,9 +126,14 @@ public Config(String hdfsuri) throws IOException { // AVRO this.queueDirectory = properties.getProperty("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - this.maximumFileSize = Long.parseLong(properties.getProperty("maximumFileSize", "60800000")); - if (this.maximumFileSize <= 0) { - throw new IllegalArgumentException("maximumFileSize must be set to >0, got " + maximumFileSize); + if (maximumFileSize > 0) { + this.maximumFileSize = maximumFileSize; + } + else { + this.maximumFileSize = Long.parseLong(properties.getProperty("maximumFileSize", "60800000")); + if (this.maximumFileSize <= 0) { + throw new IllegalArgumentException("maximumFileSize must be set to >0, got " + this.maximumFileSize); + } } // kerberos @@ -267,10 +280,6 @@ public long getMaximumFileSize() { return maximumFileSize; } - public void setMaximumFileSize(long maximumFileSize) { - this.maximumFileSize = maximumFileSize; - } - public int getNumOfConsumers() { return numOfConsumers; } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java index 8a340322..abe71372 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java @@ -72,7 +72,6 @@ public class BatchDistribution implements Consumer> { private final Config config; private final Map partitionFileMap; - // BatchDistribution? RecordDistribution? public BatchDistribution( Config config, String topic, @@ -133,13 +132,10 @@ public void accept(List batch) { partitionFileMap.forEach((key, value) -> { try { value.commitRecords(); - // FIXME: Implement timeout checks for when the PartitionFileImpl object last time wrote to HDFS. - // Something like implementing lastTimeCalled on .writeToHdfs(), which is then checked during .commitRecords(). } catch (IOException e) { LOGGER.error("Failed to write the SyslogRecords to PartitionFileImpl <{}> in topic <{}>", key, topic); - // FIXME: Handle the issue of rebalancing the kafka consumer group in case an exception is thrown after part of the batch is stored to HDFS. - // Fail fast and restart the whole cfe_39 so the kafka consumer group offsets can be fetched again from the files stored in HDFS. + // FIXME: Fail fast and restart the whole cfe_39 so the kafka consumer group offsets can be fetched again from the files stored in HDFS. throw new RuntimeException(e); } }); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index 841628c6..227af98c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -55,8 +55,7 @@ public class KafkaReader implements AutoCloseable { - final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); - private Iterator> kafkaRecordsIterator = Collections.emptyIterator(); + private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); private final Consumer kafkaConsumer; private final java.util.function.Consumer> callbackFunction; @@ -69,7 +68,7 @@ public KafkaReader( } public void read() { - long offset; + Iterator> kafkaRecordsIterator = Collections.emptyIterator(); if (!kafkaRecordsIterator.hasNext()) { // still need to consume more, infinitely loop because connection problems may cause return of an empty iterator ConsumerRecords kafkaRecords = kafkaConsumer.poll(Duration.ofSeconds(60)); @@ -95,6 +94,9 @@ KafkaRecord and other required data for HDFS storage are added to the input para callbackFunction.accept(recordOffsetObjectList); kafkaConsumer.commitSync(); } + else { + // FIXME: If no new kafka record batches is received for a while, a consumer rebalance may have happened. To resolve use callbackFunction.accept() with empty recordOffsetObjectList. + } } @Override diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 952092f0..8eeac38f 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -102,6 +102,10 @@ public void commitRecords() throws IOException { if (storedOffset > 0) { batchOffsets.add(storedOffset); } + // No records mean consumer group rebalance happened, write file to HDFS. + if (syslogRecordList.isEmpty()) { + writeToHdfsEarly(); + } } public void writeToHdfsEarly() throws IOException { diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 49000dc1..fd464f85 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -134,7 +134,7 @@ public void normalRecordsTest() { new TopicCounter("topicName") // TopicCounter object from metrics ); - List recordOffsetObjectList = new ArrayList<>(); + List kafkaRecordList = new ArrayList<>(); ConsumerRecord record = new ConsumerRecord<>( "topicName", @@ -144,13 +144,13 @@ public void normalRecordsTest() { "<12>1 2022-04-25T07:34:50.804Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"835bf792-91cf-44e3-976b-518330bb8fd3\" source=\"source\" unixtime=\"1650872090805\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!" .getBytes(StandardCharsets.UTF_8) ); - KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( + KafkaRecordImpl kafkaRecord = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -160,13 +160,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:50.806Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -176,13 +171,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:50.822Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02\"][event_id@48577 hostname=\"jla-02\" uuid=\"1848d8a1-2f08-4a1e-bec4-ff9e6dd92553\" source=\"source\" unixtime=\"1650872090822\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] 470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi." .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -192,13 +182,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:50.822Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02\"][event_id@48577 hostname=\"jla-02\" uuid=\"5e1a0398-c2a0-468d-a562-c3bb31f0f853\" source=\"source\" unixtime=\"1650872090822\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] 470646 [Thread-3] INFO com.teragrep.jla_02.Logback Audit - Logback-audit says hi." .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -208,13 +193,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:50.822Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02\"][event_id@48577 hostname=\"jla-02\" uuid=\"6268c3a2-5bda-427f-acce-29416eb445f4\" source=\"source\" unixtime=\"1650872090822\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] 470647 [Thread-3] INFO com.teragrep.jla_02.Logback Metric - Logback-metric says hi." .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -224,13 +204,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.238Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"b500dcaf-1101-4000-b6b9-bfb052ddbf86\" source=\"source\" unixtime=\"1650872092238\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.238 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info audit says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -240,13 +215,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.239Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"05363122-51ac-4c0b-a681-f5868081f56d\" source=\"source\" unixtime=\"1650872092239\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info daily says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -256,13 +226,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.239Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"7bbcd843-b795-4c14-b4a1-95f5d445cbcd\" source=\"source\" unixtime=\"1650872092239\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info metric says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -272,13 +237,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.240Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"2bc0a9f9-237d-4656-b40a-3038aace37f0\" source=\"source\" unixtime=\"1650872092240\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn audit says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -288,13 +248,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.240Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"ecf61e8d-e3a7-48ef-9b73-3c5a5243d2e6\" source=\"source\" unixtime=\"1650872092240\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn daily says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -304,13 +259,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.241Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"bf101d5a-e816-4f51-b132-97f8e3431f8e\" source=\"source\" unixtime=\"1650872092241\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.241 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn metric says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -320,13 +270,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.241Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"ef94d9e9-3c44-4892-b5a6-bf361d13ff97\" source=\"source\" unixtime=\"1650872092241\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.241 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error audit says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -336,13 +281,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.242Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"5bce6e3d-767d-44b4-a044-6c4872f8f2b5\" source=\"source\" unixtime=\"1650872092242\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.242 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error daily says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -352,13 +292,8 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:52.243Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"3bb55ce4-0ea7-413a-b403-28b174d7ac99\" source=\"source\" unixtime=\"1650872092243\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -367,13 +302,8 @@ record = new ConsumerRecord<>( "2022-04-25T07:34:52.244Z".getBytes(StandardCharsets.UTF_8), null ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -383,15 +313,10 @@ record = new ConsumerRecord<>( "12>1 2022-04-25T07:34:52.245Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"3bb55ce4-0ea7-413a-b403-28b174d7ac99\" source=\"source\" unixtime=\"1650872092243\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872092\"] 25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); - output.accept(recordOffsetObjectList); + output.accept(kafkaRecordList); // Assert that records 11-13 are present in local avro-file. @@ -461,6 +386,33 @@ record = new ConsumerRecord<>( Assertions.assertEquals(10, syslogRecord.getOffset()); Assertions.assertFalse(reader.hasNext()); + // Use empty batch to flush the local files to HDFS. + + List kafkaRecordListEmpty = new ArrayList<>(); + output.accept(kafkaRecordListEmpty); + Assertions.assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.13"))); + hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.13"); + //Init input stream + FSDataInputStream inputStream2 = fs.open(hdfsreadpath); + //The data is in AVRO-format, so it can't be read as a string. + DataFileStream reader2 = new DataFileStream<>( + inputStream2, + new SpecificDatumReader<>(SyslogRecord.class) + ); + SyslogRecord syslogRecord2 = null; + LOGGER.info("\nReading records from file {}:", hdfsreadpath); + + Assertions.assertTrue(reader2.hasNext()); + syslogRecord2 = reader2.next(syslogRecord2); + Assertions.assertEquals(11, syslogRecord2.getOffset()); + Assertions.assertTrue(reader2.hasNext()); + syslogRecord2 = reader2.next(syslogRecord2); + Assertions.assertEquals(12, syslogRecord2.getOffset()); + Assertions.assertTrue(reader2.hasNext()); + syslogRecord2 = reader2.next(syslogRecord2); + Assertions.assertEquals(13, syslogRecord2.getOffset()); + Assertions.assertFalse(reader2.hasNext()); }); } @@ -491,16 +443,16 @@ public void skipNonRFC5424DatabaseOutputTest() { "12>1 2022-04-25T07:34:50.806Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( + KafkaRecordImpl kafkaRecord = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - List recordOffsetObjectList = new ArrayList<>(); - recordOffsetObjectList.add(recordOffsetObject); - output.accept(recordOffsetObjectList); + List kafkaRecordList = new ArrayList<>(); + kafkaRecordList.add(kafkaRecord); + output.accept(kafkaRecordList); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); // File in hdfs does not contain any records, but acts as a marker for kafka consumer offsets. @@ -549,16 +501,16 @@ public void skipNullRFC5424DatabaseOutputTest() { "2022-04-25T07:34:50.806Z".getBytes(StandardCharsets.UTF_8), null ); - KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( + KafkaRecordImpl kafkaRecord = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - List recordOffsetObjectList = new ArrayList<>(); - recordOffsetObjectList.add(recordOffsetObject); - output.accept(recordOffsetObjectList); + List kafkaRecordList = new ArrayList<>(); + kafkaRecordList.add(kafkaRecord); + output.accept(kafkaRecordList); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); // File in hdfs does not contain any records, but acts as a marker for kafka consumer offsets. @@ -600,7 +552,7 @@ public void skipNullAndNonRFC5424DatabaseOutputTest() { new TopicCounter("topicName") // TopicCounter object from metrics ); - List recordOffsetObjectList = new ArrayList<>(); + List kafkaRecordList = new ArrayList<>(); ConsumerRecord record = new ConsumerRecord<>( "topicName", @@ -609,13 +561,13 @@ public void skipNullAndNonRFC5424DatabaseOutputTest() { "2022-04-25T07:34:50.806Z".getBytes(StandardCharsets.UTF_8), null ); - KafkaRecordImpl recordOffsetObject = new KafkaRecordImpl( + KafkaRecordImpl kafkaRecord = new KafkaRecordImpl( record.topic(), record.partition(), record.offset(), record.value() ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", @@ -625,13 +577,8 @@ record = new ConsumerRecord<>( "12>1 2022-04-25T07:34:50.807Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); record = new ConsumerRecord<>( "topicName", 0, @@ -640,14 +587,9 @@ record = new ConsumerRecord<>( "<12>1 2022-04-25T07:34:50.807Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - recordOffsetObject = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() - ); - recordOffsetObjectList.add(recordOffsetObject); - output.accept(recordOffsetObjectList); + kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); + kafkaRecordList.add(kafkaRecord); + output.accept(kafkaRecordList); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"))); diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 773706fd..c67626e8 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -99,6 +99,7 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } + @Disabled(value = "This needs refactoring") @Test public void hdfsWriteTest() { // This test case is for testing the functionality of the HDFSWrite.java by writing pre-generated AVRO-files to the HDFS database and asserting the results are correct. @@ -146,6 +147,7 @@ public void hdfsWriteTest() { }); } + @Disabled(value = "This needs refactoring") @Test public void hdfsWriteExceptionTest() { // This test case is for testing the functionality of the HDFSWrite.java exception handling by trying to write the same file twice and asserting that the proper exception is thrown. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 81f0b7df..54c22d67 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -85,7 +85,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 3000); fs = new TestFileSystemFactory().create(config.getHdfsuri()); }); } @@ -100,6 +100,7 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } + @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -108,10 +109,9 @@ public void teardownMiniCluster() { public void ingestion0FilesTest() { /*This test case is for testing the functionality of the ingestion when there are no files already present in the database before starting ingestion. Maximum file size is set to 30,000 in the config. - Empty HDFS database, 140 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in a single avro-file per partition.*/ + Empty HDFS database, 160 records in mock kafka consumer ready for ingestion. All 16 records for each 10 topic partitions are stored in a single avro-file per partition.*/ assertDoesNotThrow(() -> { Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - config.setMaximumFileSize(30000); // This parameter defines the amount of records that can fit inside a single AVRO-file. Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); @@ -303,6 +303,7 @@ record = reader.next(record); }); } + @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -314,7 +315,7 @@ public void ingestion0FilesLowSizeTest() { Empty HDFS database, 140 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in two avro-files per partition based on MaximumFileSize.*/ assertDoesNotThrow(() -> { Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - config.setMaximumFileSize(3000); // This parameter defines the amount of records that can fit inside a single AVRO-file. + Config config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 3000); Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 5866a526..da56983b 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -51,10 +51,7 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.junit.jupiter.api.condition.DisabledIfSystemProperty; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -87,7 +84,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files to HDFS where one file has new timestamp and other old, which are normally generated during data ingestion from mock kafka consumer. @@ -132,6 +129,7 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } + @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -153,7 +151,6 @@ public void ingestion1Old1NewFileTest() { Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. Assertions.assertTrue((System.currentTimeMillis() - config.getPruneOffset()) > 157784760000L); - config.setMaximumFileSize(30000); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 0adfe4bb..7de3207d 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -53,10 +53,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.junit.jupiter.api.condition.DisabledIfSystemProperty; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,7 +89,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. @@ -135,6 +132,7 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } + @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -153,7 +151,6 @@ public void ingestion2NewFilesTest() { Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - config.setMaximumFileSize(30000); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index ac179b75..b059b9de 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -51,10 +51,7 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.junit.jupiter.api.condition.DisabledIfSystemProperty; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -87,7 +84,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); fs = new TestFileSystemFactory().create(config.getHdfsuri()); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. @@ -133,6 +130,7 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } + @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -153,7 +151,6 @@ public void ingestion2OldFilesTest() { Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. Assertions.assertTrue((System.currentTimeMillis() - config.getPruneOffset()) > 157784760000L); - config.setMaximumFileSize(30000); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); From f1e1ea1a951a4ad65069556c50f83e6d6892f6e9 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 28 Aug 2024 13:29:34 +0300 Subject: [PATCH 16/77] Implemented BatchDistribution interface as a subtype of Consumer interface, renamed BatchDistribution class to BatchDistributionImpl. Implemented IngestionRebalanceListener for handling kafka consumer group rebalance. --- .../consumers/kafka/BatchDistribution.java | 119 +----------- .../kafka/BatchDistributionImpl.java | 173 ++++++++++++++++++ .../consumers/kafka/HdfsDataIngestion.java | 3 +- .../kafka/IngestionRebalanceListener.java | 74 ++++++++ .../cfe_39/consumers/kafka/KafkaReader.java | 20 +- .../consumers/kafka/ReadCoordinator.java | 7 +- .../cfe_39/BatchDistributionTest.java | 10 +- .../teragrep/cfe_39/KafkaConsumerTest.java | 18 +- .../cfe_39/ProcessingFailureTest.java | 6 +- 9 files changed, 288 insertions(+), 142 deletions(-) create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/IngestionRebalanceListener.java diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java index abe71372..73365eaf 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java @@ -45,123 +45,10 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.google.gson.*; -import com.teragrep.cfe_39.configuration.Config; -import com.teragrep.cfe_39.metrics.topic.TopicCounter; -import com.teragrep.cfe_39.metrics.DurationStatistics; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.*; -import java.time.Instant; -import java.util.*; +import java.util.List; import java.util.function.Consumer; -/* The kafka stream should first be deserialized using rlo_06 and then serialized again using avro and stored in HDFS. - The target where the record is stored in HDFS is based on the topic, partition and offset. ie. topic_name/0.123456 where offset is 123456 - The mock consumer is activated for testing using the configuration file: readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")*/ - -public class BatchDistribution implements Consumer> { - - private static final Logger LOGGER = LoggerFactory.getLogger(BatchDistribution.class); - - private final String topic; - private final DurationStatistics durationStatistics; - private final TopicCounter topicCounter; - private long lastTimeCalled; - private final Config config; - private final Map partitionFileMap; - - public BatchDistribution( - Config config, - String topic, - DurationStatistics durationStatistics, - TopicCounter topicCounter - ) { - this.config = config; - this.topic = topic; - this.durationStatistics = durationStatistics; - this.topicCounter = topicCounter; - this.partitionFileMap = new HashMap<>(); - this.lastTimeCalled = Instant.now().toEpochMilli(); - } - - /* Input parameter is a batch of RecordOffsetObjects from kafka. Each object contains a record and its metadata (topic, partition and offset). - * Distributes the received kafka record batch to PartitionFileImpl objects based on topic partition which the record originates from. - * */ - @Override - public void accept(List batch) { - long thisTime = Instant.now().toEpochMilli(); - long ftook = thisTime - lastTimeCalled; - topicCounter.setKafkaLatency(ftook); - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Fuura searching your batch for <[{}]> with records <{}> and took <{}> milliseconds. <{}> EPS. ", - topic, batch.size(), (ftook), (batch.size() * 1000L / ftook) - ); - } - long batchBytes = 0L; - long start = Instant.now().toEpochMilli(); - // Starts measuring performance here. Measures how long it takes to process the whole batch. - - // Distribute the records of the batch to a PartitionFileImpl object based on partition from which the record originates from. - ListIterator recordOffsetListIterator = batch.listIterator(); - while (recordOffsetListIterator.hasNext()) { - KafkaRecordImpl next = recordOffsetListIterator.next(); - JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); - // If the PartitionFileImpl corresponding to the record's partition doesn't exist, create one. - if (!partitionFileMap.containsKey(recordOffset.get("partition").getAsString())) { - try { - partitionFileMap - .put(recordOffset.get("partition").getAsString(), new PartitionFileImpl(config, recordOffset)); - } - catch (IOException e) { - LOGGER.error("Failed to create new PartitionFileImpl for record <{}>", recordOffset); - throw new RuntimeException(e); - } - } - // Every PartitionFileImpl object will hold responsibility over a single unique file that is related to a single topic partition. - PartitionFileImpl recordPartitionFile = partitionFileMap.get(recordOffset.get("partition").getAsString()); - // Tell PartitionFileImpl to add the current record to the list of records that are going to be added to the file. - recordPartitionFile.addRecord(next); - batchBytes = batchBytes + next.size(); // metrics - } - - // When all records in the current batch have been distributed to different PartitionFileImpl objects successfully, proceed to adding the records to the files for all PartitionFileImpl objects. - partitionFileMap.forEach((key, value) -> { - try { - value.commitRecords(); - } - catch (IOException e) { - LOGGER.error("Failed to write the SyslogRecords to PartitionFileImpl <{}> in topic <{}>", key, topic); - // FIXME: Fail fast and restart the whole cfe_39 so the kafka consumer group offsets can be fetched again from the files stored in HDFS. - throw new RuntimeException(e); - } - }); +public interface BatchDistribution extends Consumer> { - // Measures performance of code that is between start and end. - long end = Instant.now().toEpochMilli(); - long took = (end - start); - topicCounter.setDatabaseLatency(took); - if (took == 0) { - took = 1; - } - long rps = batch.size() * 1000L / took; - topicCounter.setRecordsPerSecond(rps); - long bps = batchBytes * 1000 / took; - topicCounter.setBytesPerSecond(bps); - durationStatistics.addAndGetRecords(batch.size()); - durationStatistics.addAndGetBytes(batchBytes); - topicCounter.addToTotalBytes(batchBytes); - topicCounter.addToTotalRecords(batch.size()); - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "Sent batch for <[{}]> with records <{}> and size <{}> KB took <{}> milliseconds. <{}> RPS. <{}> KB/s ", - topic, batch.size(), batchBytes / 1024, (took), rps, bps / 1024 - ); - } - lastTimeCalled = Instant.now().toEpochMilli(); - } + void rebalance(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java new file mode 100644 index 00000000..3a836261 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -0,0 +1,173 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.google.gson.*; +import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.metrics.topic.TopicCounter; +import com.teragrep.cfe_39.metrics.DurationStatistics; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.time.Instant; +import java.util.*; + +/* The kafka stream should first be deserialized using rlo_06 and then serialized again using avro and stored in HDFS. + The target where the record is stored in HDFS is based on the topic, partition and offset. ie. topic_name/0.123456 where offset is 123456 + The mock consumer is activated for testing using the configuration file: readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")*/ + +public class BatchDistributionImpl implements BatchDistribution { + + private static final Logger LOGGER = LoggerFactory.getLogger(BatchDistributionImpl.class); + + private final String topic; + private final DurationStatistics durationStatistics; + private final TopicCounter topicCounter; + private long lastTimeCalled; + private final Config config; + private final Map partitionFileMap; + + public BatchDistributionImpl( + Config config, + String topic, + DurationStatistics durationStatistics, + TopicCounter topicCounter + ) { + this.config = config; + this.topic = topic; + this.durationStatistics = durationStatistics; + this.topicCounter = topicCounter; + this.partitionFileMap = new HashMap<>(); + this.lastTimeCalled = Instant.now().toEpochMilli(); + } + + /* Input parameter is a batch of RecordOffsetObjects from kafka. Each object contains a record and its metadata (topic, partition and offset). + * Distributes the received kafka record batch to PartitionFileImpl objects based on topic partition which the record originates from. + * */ + @Override + public void accept(List batch) { + long thisTime = Instant.now().toEpochMilli(); + long ftook = thisTime - lastTimeCalled; + topicCounter.setKafkaLatency(ftook); + if (LOGGER.isDebugEnabled()) { + LOGGER + .debug( + "Fuura searching your batch for <[{}]> with records <{}> and took <{}> milliseconds. <{}> EPS. ", + topic, batch.size(), (ftook), (batch.size() * 1000L / ftook) + ); + } + long batchBytes = 0L; + long start = Instant.now().toEpochMilli(); + // Starts measuring performance here. Measures how long it takes to process the whole batch. + + // Distribute the records of the batch to a PartitionFileImpl object based on partition from which the record originates from. + ListIterator recordOffsetListIterator = batch.listIterator(); + while (recordOffsetListIterator.hasNext()) { + KafkaRecordImpl next = recordOffsetListIterator.next(); + JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); + // If the PartitionFileImpl corresponding to the record's partition doesn't exist, create one. + if (!partitionFileMap.containsKey(recordOffset.get("partition").getAsString())) { + try { + partitionFileMap + .put(recordOffset.get("partition").getAsString(), new PartitionFileImpl(config, recordOffset)); + } + catch (IOException e) { + LOGGER.error("Failed to create new PartitionFileImpl for record <{}>", recordOffset); + throw new RuntimeException(e); + } + } + // Every PartitionFileImpl object will hold responsibility over a single unique file that is related to a single topic partition. + PartitionFileImpl recordPartitionFile = partitionFileMap.get(recordOffset.get("partition").getAsString()); + // Tell PartitionFileImpl to add the current record to the list of records that are going to be added to the file. + recordPartitionFile.addRecord(next); + batchBytes = batchBytes + next.size(); // metrics + } + + // When all records in the current batch have been distributed to different PartitionFileImpl objects successfully, proceed to adding the records to the files for all PartitionFileImpl objects. + partitionFileMap.forEach((key, value) -> { + try { + value.commitRecords(); + } + catch (IOException e) { + LOGGER.error("Failed to write the SyslogRecords to PartitionFileImpl <{}> in topic <{}>", key, topic); + // FIXME: Fail fast and restart the whole cfe_39 so the kafka consumer group offsets can be fetched again from the files stored in HDFS. + throw new RuntimeException(e); + } + }); + + // Measures performance of code that is between start and end. + long end = Instant.now().toEpochMilli(); + long took = (end - start); + topicCounter.setDatabaseLatency(took); + if (took == 0) { + took = 1; + } + long rps = batch.size() * 1000L / took; + topicCounter.setRecordsPerSecond(rps); + long bps = batchBytes * 1000 / took; + topicCounter.setBytesPerSecond(bps); + durationStatistics.addAndGetRecords(batch.size()); + durationStatistics.addAndGetBytes(batchBytes); + topicCounter.addToTotalBytes(batchBytes); + topicCounter.addToTotalRecords(batch.size()); + if (LOGGER.isDebugEnabled()) { + LOGGER + .debug( + "Sent batch for <[{}]> with records <{}> and size <{}> KB took <{}> milliseconds. <{}> RPS. <{}> KB/s ", + topic, batch.size(), batchBytes / 1024, (took), rps, bps / 1024 + ); + } + lastTimeCalled = Instant.now().toEpochMilli(); + } + + @Override + public void rebalance() { + // Handle rebalancing here. Store all remaining records of all PartitionFile objects to HDFS. + accept(new ArrayList<>()); // Will write all files with records still in them to HDFS. + // FIXME: delete all PartitionFile objects from the partitionFileMap. Must also delete the files linked to the objects. + } +} diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index dd327e29..5028b28c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -66,7 +66,6 @@ import java.time.Duration; import java.util.*; import java.util.concurrent.CopyOnWriteArrayList; -import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -212,7 +211,7 @@ private void createReader( Consumer group is also handled here, and each consumer of the group runs on separate thread.*/ int numOfThreads = Math.min(numOfConsumers, listPartitionInfo.size()); // FIXME: Alter the equation for calculating the number of threads. for (int threadId = 1; numOfThreads >= threadId; threadId++) { - Consumer> output = new BatchDistribution( + BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings topic, // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/IngestionRebalanceListener.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/IngestionRebalanceListener.java new file mode 100644 index 00000000..ccdda787 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/IngestionRebalanceListener.java @@ -0,0 +1,74 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; +import org.apache.kafka.common.TopicPartition; + +import java.util.Collection; + +public class IngestionRebalanceListener implements ConsumerRebalanceListener { + + private final Consumer kafkaConsumer; + private final BatchDistributionImpl callbackFunction; + + public IngestionRebalanceListener(Consumer kafkaConsumer, BatchDistributionImpl callbackFunction) { + this.kafkaConsumer = kafkaConsumer; + this.callbackFunction = callbackFunction; + } + + @Override + public void onPartitionsRevoked(Collection collection) { + // Flush any records from the temporary files to HDFS to synchronize database with committed kafka offsets, and clean up PartitionFile list. + callbackFunction.rebalance(); + } + + @Override + public void onPartitionsAssigned(Collection collection) { + // NoOp: records and offsets are already stored to HDFS by the callbackFunction.rebalance(), and kafka coordinator should handle committed offsets automatically. + } +} diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index 227af98c..453b532f 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -57,14 +57,13 @@ public class KafkaReader implements AutoCloseable { private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); private final Consumer kafkaConsumer; - private final java.util.function.Consumer> callbackFunction; + private final BatchDistributionImpl callbackFunction; + private final IngestionRebalanceListener ingestionRebalanceListener; - public KafkaReader( - Consumer kafkaConsumer, - java.util.function.Consumer> callbackFunction - ) { + public KafkaReader(Consumer kafkaConsumer, BatchDistributionImpl callbackFunction) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; + this.ingestionRebalanceListener = new IngestionRebalanceListener(this.kafkaConsumer, this.callbackFunction); } public void read() { @@ -89,13 +88,20 @@ public void read() { } if (!recordOffsetObjectList.isEmpty()) { - /* This is the BatchDistribution.accept() function. + /* This is the BatchDistributionImpl.accept() function. KafkaRecord and other required data for HDFS storage are added to the input parameters of the accept() function which processes the consumed record.*/ callbackFunction.accept(recordOffsetObjectList); kafkaConsumer.commitSync(); + // lastTimeCalled = Instant.now().toEpochMilli(); } else { - // FIXME: If no new kafka record batches is received for a while, a consumer rebalance may have happened. To resolve use callbackFunction.accept() with empty recordOffsetObjectList. + // FIXME: If no new kafka record batches is received for a while, use callbackFunction.accept() with empty recordOffsetObjectList to flush records that have already been committed in kafka to HDFS. + /*long thisTime = Instant.now().toEpochMilli(); + long ftook = thisTime - lastTimeCalled; + if (ftook > config.consumerTimeout) { + callbackFunction.accept(recordOffsetObjectList); + lastTimeCalled = Instant.now().toEpochMilli(); + }*/ } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index 54fdce4d..94ef62aa 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -52,7 +52,6 @@ import org.slf4j.LoggerFactory; import java.util.*; -import java.util.function.Consumer; public class ReadCoordinator implements Runnable { @@ -60,14 +59,14 @@ public class ReadCoordinator implements Runnable { private final String queueTopic; private final Properties readerKafkaProperties; - private final Consumer> callbackFunction; + private final BatchDistributionImpl callbackFunction; private boolean run = true; private final Map hdfsStartOffsets; public ReadCoordinator( String queueTopic, Properties readerKafkaProperties, - Consumer> callbackFunction, + BatchDistributionImpl callbackFunction, Map hdfsStartOffsets ) { this.queueTopic = queueTopic; @@ -79,7 +78,7 @@ public ReadCoordinator( private KafkaReader createKafkaReader( Properties readerKafkaProperties, String topic, - Consumer> callbackFunction, + BatchDistributionImpl callbackFunction, boolean useMockKafkaConsumer ) { diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index fd464f85..715bbfa7 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -47,7 +47,7 @@ import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.Config; -import com.teragrep.cfe_39.consumers.kafka.BatchDistribution; +import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; @@ -127,7 +127,7 @@ public void normalRecordsTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistribution( + BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -428,7 +428,7 @@ public void skipNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistribution( + Consumer> output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -487,7 +487,7 @@ public void skipNullRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistribution( + Consumer> output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -545,7 +545,7 @@ public void skipNullAndNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistribution( + Consumer> output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 366a0763..2bc03f5f 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -46,11 +46,13 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.rlo_06.ParseException; import org.apache.kafka.common.TopicPartition; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,6 +66,7 @@ public class KafkaConsumerTest { private static final Logger LOGGER = LoggerFactory.getLogger(KafkaConsumerTest.class); + @Disabled(value = "This needs refactoring") @Test public void readCoordinatorTest2Threads() { assertDoesNotThrow(() -> { @@ -73,12 +76,12 @@ public void readCoordinatorTest2Threads() { Config config = new Config(); Map hdfsStartOffsets = new HashMap<>(); ArrayList> messages = new ArrayList<>(); - Consumer> output = message -> messages.add(message); + Consumer> output = message -> messages.add(message); // FIXME: Lambda does not work with BatchDistributionImpl interface ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", config.getKafkaConsumerProperties(), - output, + (BatchDistributionImpl) output, // FIXME: Dont/Can't use casting like this. hdfsStartOffsets ); Thread readThread = new Thread(null, readCoordinator, "testConsumerTopic1"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. @@ -89,7 +92,7 @@ public void readCoordinatorTest2Threads() { ReadCoordinator readCoordinator2 = new ReadCoordinator( "testConsumerTopic", config.getKafkaConsumerProperties(), - output, + (BatchDistributionImpl) output, // FIXME: Dont/Can't use casting like this. hdfsStartOffsets ); Thread readThread2 = new Thread(null, readCoordinator2, "testConsumerTopic2"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. @@ -488,8 +491,10 @@ public void readCoordinatorTest2Threads() { }); } + @Disabled(value = "This needs refactoring") @Test public void readCoordinatorTest1Thread() { + assertDoesNotThrow(() -> { // Set system properties to use the valid configuration. System @@ -502,7 +507,7 @@ public void readCoordinatorTest1Thread() { ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", config.getKafkaConsumerProperties(), - output, + (BatchDistributionImpl) output, // FIXME: Dont/Can't use casting like this. hdfsStartOffsets ); Thread readThread = new Thread(null, readCoordinator, "testConsumerTopic0"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. @@ -517,7 +522,10 @@ public void readCoordinatorTest1Thread() { list.add("[WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!"); list.add("[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!"); list.add("470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi."); - list.add("470646 [Thread-3] INFO com.teragrep.jla_02.Logback Audit - Logback-audit says hi."); + list + .add( + "470646 [Thread-3] INFO com.teragrep@Disabled(value = \"This needs refactoring\").jla_02.Logback Audit - Logback-audit says hi." + ); list.add("470647 [Thread-3] INFO com.teragrep.jla_02.Logback Metric - Logback-metric says hi."); list .add( diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index fe5a08d5..e8c52297 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.configuration.Config; -import com.teragrep.cfe_39.consumers.kafka.BatchDistribution; +import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; @@ -119,7 +119,7 @@ public void failNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistribution( + Consumer> output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -162,7 +162,7 @@ public void failNullRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistribution( + Consumer> output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics From a63713d89659a02ee6afed2f93a230141c4810b7 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 28 Aug 2024 13:55:41 +0300 Subject: [PATCH 17/77] Finished implementing kafka consumer group rebalance handling. Added rebalance() method to PartitionFile interface. --- .../cfe_39/consumers/kafka/BatchDistributionImpl.java | 6 +++++- .../teragrep/cfe_39/consumers/kafka/PartitionFile.java | 2 ++ .../cfe_39/consumers/kafka/PartitionFileImpl.java | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index 3a836261..aecf1475 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -168,6 +168,10 @@ public void accept(List batch) { public void rebalance() { // Handle rebalancing here. Store all remaining records of all PartitionFile objects to HDFS. accept(new ArrayList<>()); // Will write all files with records still in them to HDFS. - // FIXME: delete all PartitionFile objects from the partitionFileMap. Must also delete the files linked to the objects. + // Delete all PartitionFile objects from the partitionFileMap. Must also delete the files linked to the objects. + partitionFileMap.forEach((key, value) -> { + value.rebalance(); + }); + partitionFileMap.clear(); } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java index d5fade83..00396f9f 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -55,4 +55,6 @@ public interface PartitionFile { void writeToHdfsEarly() throws IOException; + void rebalance(); + } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 8eeac38f..6b006059 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -79,10 +79,12 @@ public class PartitionFileImpl implements PartitionFile { this.partitionRecords = new PartitionRecordsImpl(config); } + @Override public void addRecord(KafkaRecordImpl kafkaRecord) { partitionRecords.addRecord(kafkaRecord); } + @Override public void commitRecords() throws IOException { List syslogRecordList = partitionRecords.toSyslogRecordList(); long storedOffset = 0; @@ -108,12 +110,18 @@ public void commitRecords() throws IOException { } } + @Override public void writeToHdfsEarly() throws IOException { if (!batchOffsets.isEmpty()) { writeToHdfs(batchOffsets.get(batchOffsets.size() - 1)); } } + @Override + public void rebalance() { + syslogFile.delete(); + } + // Writes the file to hdfs and initializes new file. private void writeToHdfs(long offset) throws IOException { try ( From d613c98f80f70204444176bf52749228c26c6c53 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 28 Aug 2024 15:05:22 +0300 Subject: [PATCH 18/77] Refactored HdfsTest.java and BatchDistributionTest.java. --- .../cfe_39/BatchDistributionTest.java | 96 +++++++++++++++---- .../java/com/teragrep/cfe_39/HdfsTest.java | 13 ++- 2 files changed, 86 insertions(+), 23 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 715bbfa7..4146f6a0 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -416,7 +416,6 @@ record = new ConsumerRecord<>( }); } - @Disabled(value = "This needs refactoring") @Test public void skipNonRFC5424DatabaseOutputTest() { // Initialize and register duration statistics @@ -435,7 +434,7 @@ public void skipNonRFC5424DatabaseOutputTest() { new TopicCounter("topicName") // TopicCounter object from metrics ); - ConsumerRecord record = new ConsumerRecord<>( + ConsumerRecord record1 = new ConsumerRecord<>( "topicName", 0, 1L, @@ -443,23 +442,56 @@ public void skipNonRFC5424DatabaseOutputTest() { "12>1 2022-04-25T07:34:50.806Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" .getBytes(StandardCharsets.UTF_8) ); - KafkaRecordImpl kafkaRecord = new KafkaRecordImpl( - record.topic(), - record.partition(), - record.offset(), - record.value() + KafkaRecordImpl kafkaRecord1 = new KafkaRecordImpl( + record1.topic(), + record1.partition(), + record1.offset(), + record1.value() + ); + + ConsumerRecord record2 = new ConsumerRecord<>( + "topicName", + 0, + 2L, + "2022-04-25T07:34:50.8067".getBytes(StandardCharsets.UTF_8), + "12>1 2022-04-25T07:34:50.807Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" + .getBytes(StandardCharsets.UTF_8) + ); + KafkaRecordImpl kafkaRecord2 = new KafkaRecordImpl( + record2.topic(), + record2.partition(), + record2.offset(), + record2.value() + ); + + ConsumerRecord record3 = new ConsumerRecord<>( + "topicName", + 0, + 3L, + "2022-04-25T07:34:50.807Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.807Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" + .getBytes(StandardCharsets.UTF_8) + ); + KafkaRecordImpl kafkaRecord3 = new KafkaRecordImpl( + record3.topic(), + record3.partition(), + record3.offset(), + record3.value() ); List kafkaRecordList = new ArrayList<>(); - kafkaRecordList.add(kafkaRecord); + kafkaRecordList.add(kafkaRecord1); + kafkaRecordList.add(kafkaRecord2); + kafkaRecordList.add(kafkaRecord3); output.accept(kafkaRecordList); + output.accept(new ArrayList<>()); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); - // File in hdfs does not contain any records, but acts as a marker for kafka consumer offsets. + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"))); + // File in hdfs does not contain any empty records. - // Assert that the file in hdfs contains the expected zero record. + // Assert that the file in hdfs contains the expected one record. - Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"); + Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -470,12 +502,19 @@ public void skipNonRFC5424DatabaseOutputTest() { SyslogRecord syslogRecord = null; LOGGER.info("\nReading records from file {}:", hdfsreadpath); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions + .assertEquals( + "{\"timestamp\": 1650872090807000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"0\", \"offset\": 3, \"origin\": \"jla-02.default\", \"payload\": \"[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!\"}", + syslogRecord.toString() + ); + Assertions.assertFalse(reader.hasNext()); }); } - @Disabled(value = "This needs refactoring") @Test public void skipNullRFC5424DatabaseOutputTest() { // Initialize and register duration statistics @@ -508,16 +547,33 @@ public void skipNullRFC5424DatabaseOutputTest() { record.value() ); + ConsumerRecord record3 = new ConsumerRecord<>( + "topicName", + 0, + 2L, + "2022-04-25T07:34:50.807Z".getBytes(StandardCharsets.UTF_8), + "<12>1 2022-04-25T07:34:50.807Z jla-02.default jla02logger - - [origin@48577 hostname=\"jla-02.default\"][event_id@48577 hostname=\"jla-02.default\" uuid=\"c3f13f9a-05e2-41bd-b0ad-1eca6fd6fd9a\" source=\"source\" unixtime=\"1650872090806\"][event_format@48577 original_format=\"rfc5424\"][event_node_relay@48577 hostname=\"cfe-06-0.cfe-06.default\" source=\"kafka-4.kafka.default.svc.cluster.local\" source_module=\"imrelp\"][event_version@48577 major=\"2\" minor=\"2\" hostname=\"cfe-06-0.cfe-06.default\" version_source=\"relay\"][event_node_router@48577 source=\"cfe-06-0.cfe-06.default.svc.cluster.local\" source_module=\"imrelp\" hostname=\"cfe-07-0.cfe-07.default\"][teragrep@48577 streamname=\"test:jla02logger:0\" directory=\"jla02logger\" unixtime=\"1650872090\"] [ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!" + .getBytes(StandardCharsets.UTF_8) + ); + KafkaRecordImpl kafkaRecord3 = new KafkaRecordImpl( + record3.topic(), + record3.partition(), + record3.offset(), + record3.value() + ); + List kafkaRecordList = new ArrayList<>(); kafkaRecordList.add(kafkaRecord); + kafkaRecordList.add(kafkaRecord3); output.accept(kafkaRecordList); + output.accept(new ArrayList<>()); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.2"))); // File in hdfs does not contain any records, but acts as a marker for kafka consumer offsets. // Assert that the file in hdfs contains the expected zero record. - Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"); + Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.2"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -528,12 +584,19 @@ public void skipNullRFC5424DatabaseOutputTest() { SyslogRecord syslogRecord = null; LOGGER.info("\nReading records from file {}:", hdfsreadpath); + Assertions.assertTrue(reader.hasNext()); + syslogRecord = reader.next(syslogRecord); + Assertions + .assertEquals( + "{\"timestamp\": 1650872090807000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"0\", \"offset\": 2, \"origin\": \"jla-02.default\", \"payload\": \"[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!\"}", + syslogRecord.toString() + ); + Assertions.assertFalse(reader.hasNext()); }); } - @Disabled(value = "This needs refactoring") @Test public void skipNullAndNonRFC5424DatabaseOutputTest() { // Initialize and register duration statistics @@ -590,6 +653,7 @@ record = new ConsumerRecord<>( kafkaRecord = new KafkaRecordImpl(record.topic(), record.partition(), record.offset(), record.value()); kafkaRecordList.add(kafkaRecord); output.accept(kafkaRecordList); + output.accept(new ArrayList<>()); Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"))); diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index c67626e8..ffc9227f 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -99,14 +99,12 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } - @Disabled(value = "This needs refactoring") @Test public void hdfsWriteTest() { // This test case is for testing the functionality of the HDFSWrite.java by writing pre-generated AVRO-files to the HDFS database and asserting the results are correct. assertDoesNotThrow(() -> { Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); - // writer.commit will delete the file that is given as an input argument. Copy the mock files to another directory so the deletion can be asserted properly too. String pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.9"; java.nio.file.Path sourceFile = Paths.get(pathname); java.nio.file.Path targetDir = Paths.get(config.getQueueDirectory()); @@ -119,8 +117,9 @@ public void hdfsWriteTest() { .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9)) { - writer.commit(avroFile); // commits avroFile to HDFS and deletes avroFile afterward. + writer.commit(avroFile); // commits avroFile to HDFS. } + targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); @@ -139,6 +138,7 @@ public void hdfsWriteTest() { try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 13)) { writer.commit(avroFile); // commits avroFile to HDFS and deletes avroFile afterward. } + targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); @@ -147,29 +147,27 @@ public void hdfsWriteTest() { }); } - @Disabled(value = "This needs refactoring") @Test public void hdfsWriteExceptionTest() { // This test case is for testing the functionality of the HDFSWrite.java exception handling by trying to write the same file twice and asserting that the proper exception is thrown. assertDoesNotThrow(() -> { Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); - // writer.commit will delete the source file that is given as an input argument. Copy the mock file to another directory so the deletion of the source file can be asserted properly. String pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.9"; java.nio.file.Path sourceFile = Paths.get(pathname); java.nio.file.Path targetDir = Paths.get(config.getQueueDirectory()); java.nio.file.Path targetFile = targetDir.resolve(sourceFile.getFileName()); Assertions.assertFalse(targetFile.toFile().exists()); Files.copy(sourceFile, targetFile); - Assertions.assertTrue(targetFile.toFile().exists()); File avroFile = new File(targetFile.toUri()); JsonObject recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9)) { - writer.commit(avroFile); // commits avroFile to HDFS and deletes avroFile afterward. + writer.commit(avroFile); // commits avroFile to HDFS. } + targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); @@ -186,6 +184,7 @@ public void hdfsWriteExceptionTest() { Exception e = Assertions.assertThrows(Exception.class, () -> writer.commit(finalAvroFile)); Assertions.assertEquals("File 0.9 already exists", e.getMessage()); writer.close(); + targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); From e4e226f6f52dd925383b1185e913b2bedc7dedd5 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 28 Aug 2024 16:04:25 +0300 Subject: [PATCH 19/77] Refactored Ingestion0FilesTest.java and divided the second test inside it to Ingestion0FilesLowSizeTest.java file. --- .../cfe_39/Ingestion0FilesLowSizeTest.java | 190 ++++++++++++++++++ .../teragrep/cfe_39/Ingestion0FilesTest.java | 49 ++--- 2 files changed, 215 insertions(+), 24 deletions(-) create mode 100644 src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java new file mode 100644 index 00000000..296e7a4d --- /dev/null +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -0,0 +1,190 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39; + +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.condition.DisabledIfSystemProperty; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.Path; + +import java.io.File; +import java.net.URI; +import java.nio.file.Files; +import java.util.*; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +public class Ingestion0FilesLowSizeTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion0FilesTest.class); + private static MiniDFSCluster hdfsCluster; + private static File baseDir; + private static Config config; + private FileSystem fs; + + // Prepares known state for testing. + @BeforeEach + public void startMiniCluster() { + assertDoesNotThrow(() -> { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + config = new Config(); + // Create a HDFS miniCluster + baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); + hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 3000); + fs = new TestFileSystemFactory().create(config.getHdfsuri()); + }); + } + + // Teardown the minicluster + @AfterEach + public void teardownMiniCluster() { + assertDoesNotThrow(() -> { + fs.close(); + }); + hdfsCluster.shutdown(); + FileUtil.fullyDelete(baseDir); + } + + @DisabledIfSystemProperty( + named = "skipIngestionTest", + matches = "true" + ) + @Test + public void ingestion0FilesLowSizeTest() { + /*This test case is for testing the functionality of the ingestion when there are files already present in the database before starting ingestion. + Maximum file size is set to 3,000 in the config. + Empty HDFS database, 140 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in two avro-files per partition based on MaximumFileSize.*/ + assertDoesNotThrow(() -> { + Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. + Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + Thread.sleep(10000); + hdfsDataIngestion.run(); + }); + + // Assert that the kafka records were ingested correctly and the database holds the correct 140 records. + + // Check that the files were properly written to HDFS. + String hdfsuri = config.getHdfsuri(); + + String path = config.getHdfsPath() + "/" + "testConsumerTopic"; + // ====== Init HDFS File System Object + Configuration conf = new Configuration(); + // Set FileSystem URI + conf.set("fs.defaultFS", hdfsuri); + // Because of Maven + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + // Set HADOOP user + System.setProperty("HADOOP_USER_NAME", "hdfs"); + System.setProperty("hadoop.home.dir", "/"); + //Get the filesystem - HDFS + assertDoesNotThrow(() -> { + fs = FileSystem.get(URI.create(hdfsuri), conf); + + Path workingDir = fs.getWorkingDirectory(); + Path newDirectoryPath = new Path(path); + Assertions.assertTrue(fs.exists(newDirectoryPath)); + + // Assert that the kafka records were ingested correctly and the database holds the expected 20 files. + FileStatus[] fileStatuses = fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")); + + Assertions + .assertEquals(10, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "1.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "2.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "3.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "4.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "5.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "6.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "7.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "8.10"))); + Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "9.10"))); + LOGGER.debug("All expected files present in HDFS."); + + // Now Assert the files that were too small to be stored in HDFS. + + List filenameList = new ArrayList<>(); + for (int i = 0; i <= 9; i++) { + filenameList.add("testConsumerTopic" + i + "." + 1); + } + + for (String fileName : filenameList) { + + String path2 = config.getQueueDirectory() + "/" + fileName; + File avroFile = new File(path2); + + Assertions.assertTrue(filenameList.contains(avroFile.getName())); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); + + for (int i = 11; i <= 13; i++) { + Assertions.assertTrue(reader.hasNext()); + SyslogRecord record = reader.next(); + Assertions.assertEquals(i, record.getOffset()); + } + Assertions.assertFalse(reader.hasNext()); + reader.close(); + avroFile.delete(); + ; + } + }); + } +} diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 54c22d67..b8c3e830 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -48,7 +48,8 @@ import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; -import org.apache.avro.file.DataFileStream; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; import org.apache.avro.specific.SpecificDatumReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; @@ -85,7 +86,7 @@ public void startMiniCluster() { // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 3000); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); fs = new TestFileSystemFactory().create(config.getHdfsuri()); }); } @@ -100,7 +101,6 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } - @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -118,7 +118,7 @@ public void ingestion0FilesTest() { hdfsDataIngestion.run(); }); - // Assert that the kafka records were ingested correctly and the database holds the correct 140 records. + // Assert that the kafka records were ingested correctly and the database holds the correct 140 records (20 broken records were skipped). assertDoesNotThrow(() -> { String path = config.getHdfsPath() + "/" + "testConsumerTopic"; Path newDirectoryPath = new Path(path); @@ -130,33 +130,33 @@ public void ingestion0FilesTest() { Create the list of files to read from HDFS. Test setup is created so each of the 0-9 partitions will have 1 file with offset of 13.*/ List filenameList = new ArrayList<>(); for (int i = 0; i <= 9; i++) { - filenameList.add(i + "." + 13); + filenameList.add("testConsumerTopic" + i + "." + 1); } FileStatus[] fileStatuses = fs.listStatus(newDirectoryPath); - Assertions.assertEquals(filenameList.size(), fileStatuses.length); - for (FileStatus fileStatus : fileStatuses) { - Assertions.assertTrue(filenameList.contains(fileStatus.getPath().getName())); + Assertions.assertEquals(0, fileStatuses.length); + LOGGER.debug("No files present in HDFS as expected as maximum file size hasn't been reached."); + + // Assert that all the records are inside the temporary AVRO-files generated by PartitionFile objects during consumption. + + File queueDirectory = new File(config.getQueueDirectory()); + File[] files = queueDirectory.listFiles(); + Assertions.assertEquals(10, files.length); + for (File file : files) { + Assertions.assertTrue(filenameList.contains(file.getName())); } - LOGGER.debug("All expected files present in HDFS."); int partitionCounter = 0; for (String fileName : filenameList) { - //==== Read files - LOGGER.info("Read file into hdfs"); - //Create a path - Path hdfsreadpath = new Path(newDirectoryPath + "/" + fileName); // The path should be the same that was used in writing the file to HDFS. - //Init input stream - FSDataInputStream inputStream = fs.open(hdfsreadpath); - //The data is in AVRO-format, so it can't be read as a string. - DataFileStream reader = new DataFileStream<>( - inputStream, - new SpecificDatumReader<>(SyslogRecord.class) - ); - SyslogRecord record = null; - LOGGER.info("\nReading records from file {}:", hdfsreadpath); + + String path2 = config.getQueueDirectory() + "/" + fileName; + File avroFile = new File(path2); + + Assertions.assertTrue(filenameList.contains(avroFile.getName())); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); + SyslogRecord record = reader.next(); Assertions .assertEquals( "{\"timestamp\": 1650872090804000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" @@ -297,7 +297,8 @@ record = reader.next(record); Assertions.assertFalse(reader.hasNext()); LOGGER.info("Partition {} passed assertions.", partitionCounter); partitionCounter++; - inputStream.close(); + reader.close(); + avroFile.delete(); } Assertions.assertEquals(10, partitionCounter); }); From b1613f5db40b4cc1a83c72b6ed36ad01dd5025d9 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 28 Aug 2024 16:37:14 +0300 Subject: [PATCH 20/77] Deleted ingestion0FilesLowSizeTest() that was already moved to a separate file. --- .../teragrep/cfe_39/Ingestion0FilesTest.java | 78 +------------------ 1 file changed, 2 insertions(+), 76 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index b8c3e830..34b77eb8 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -51,7 +51,6 @@ import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; import org.apache.avro.specific.SpecificDatumReader; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.junit.jupiter.api.*; @@ -61,7 +60,6 @@ import org.apache.hadoop.fs.Path; import java.io.File; -import java.net.URI; import java.nio.file.Files; import java.util.*; @@ -109,7 +107,7 @@ public void teardownMiniCluster() { public void ingestion0FilesTest() { /*This test case is for testing the functionality of the ingestion when there are no files already present in the database before starting ingestion. Maximum file size is set to 30,000 in the config. - Empty HDFS database, 160 records in mock kafka consumer ready for ingestion. All 16 records for each 10 topic partitions are stored in a single avro-file per partition.*/ + Empty HDFS database, 160 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in a single avro-file per partition (2 skipped records per file).*/ assertDoesNotThrow(() -> { Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); @@ -125,9 +123,7 @@ public void ingestion0FilesTest() { Assertions.assertTrue(fs.exists(newDirectoryPath)); /* This is the HDFS write path for the files: - Path hdfswritepath = new Path(newDirectoryPath + "/" + fileName); where newDirectoryPath is config.getHdfsPath() + "/" + lastObject.topic; and filename is lastObject.partition+"."+lastObject.offset; - - Create the list of files to read from HDFS. Test setup is created so each of the 0-9 partitions will have 1 file with offset of 13.*/ + Path hdfswritepath = new Path(newDirectoryPath + "/" + fileName); where newDirectoryPath is config.getHdfsPath() + "/" + lastObject.topic; and filename is lastObject.partition+"."+lastObject.offset;.*/ List filenameList = new ArrayList<>(); for (int i = 0; i <= 9; i++) { filenameList.add("testConsumerTopic" + i + "." + 1); @@ -303,74 +299,4 @@ record = reader.next(record); Assertions.assertEquals(10, partitionCounter); }); } - - @Disabled(value = "This needs refactoring") - @DisabledIfSystemProperty( - named = "skipIngestionTest", - matches = "true" - ) - @Test - public void ingestion0FilesLowSizeTest() { - /*This test case is for testing the functionality of the ingestion when there are files already present in the database before starting ingestion. - Maximum file size is set to 3,000 in the config. - Empty HDFS database, 140 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in two avro-files per partition based on MaximumFileSize.*/ - assertDoesNotThrow(() -> { - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - Config config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 3000); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); - Thread.sleep(10000); - hdfsDataIngestion.run(); - }); - - // Assert that the kafka records were ingested correctly and the database holds the correct 140 records. - - // Check that the files were properly written to HDFS. - String hdfsuri = config.getHdfsuri(); - - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; - // ====== Init HDFS File System Object - Configuration conf = new Configuration(); - // Set FileSystem URI - conf.set("fs.defaultFS", hdfsuri); - // Because of Maven - conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); - conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - // Set HADOOP user - System.setProperty("HADOOP_USER_NAME", "hdfs"); - System.setProperty("hadoop.home.dir", "/"); - //Get the filesystem - HDFS - assertDoesNotThrow(() -> { - fs = FileSystem.get(URI.create(hdfsuri), conf); - - Path workingDir = fs.getWorkingDirectory(); - Path newDirectoryPath = new Path(path); - Assertions.assertTrue(fs.exists(newDirectoryPath)); - - // Assert that the kafka records were ingested correctly and the database holds the expected 20 files. - Assertions - .assertEquals(20, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "1.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "1.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "2.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "2.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "3.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "3.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "4.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "4.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "5.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "5.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "6.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "6.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "7.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "7.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "8.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "8.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "9.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "9.13"))); - LOGGER.debug("All expected files present in HDFS."); - }); - } } From 9d6ee90e0785e86700d1544fc57d921299d6739e Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 29 Aug 2024 10:13:18 +0300 Subject: [PATCH 21/77] Refactored Ingestion1Old1NewFileTest.java. Fixed bug in syslogFile initialization in PartitionFileImpl.java and refactored ProcessingFailureTest.java which was affected by the fix. Cleaned comments. --- .../consumers/kafka/PartitionFileImpl.java | 3 + .../teragrep/cfe_39/Ingestion0FilesTest.java | 2 +- .../cfe_39/Ingestion1Old1NewFileTest.java | 57 ++++++++++++++----- .../cfe_39/ProcessingFailureTest.java | 30 ++++++++++ 4 files changed, 76 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 6b006059..d6f0fb2a 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -77,6 +77,9 @@ public class PartitionFileImpl implements PartitionFile { this.topicPartition = topicPartition; this.batchOffsets = new ArrayList<>(); this.partitionRecords = new PartitionRecordsImpl(config); + try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { + // Initializes the syslogFile. + } } @Override diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 34b77eb8..9302d036 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -116,7 +116,7 @@ public void ingestion0FilesTest() { hdfsDataIngestion.run(); }); - // Assert that the kafka records were ingested correctly and the database holds the correct 140 records (20 broken records were skipped). + // Assert that the kafka records were ingested correctly and the database/temporary file holds the correct 140 records (20 broken records were skipped). assertDoesNotThrow(() -> { String path = config.getHdfsPath() + "/" + "testConsumerTopic"; Path newDirectoryPath = new Path(path); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index da56983b..001baa29 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -45,8 +45,12 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.specific.SpecificDatumReader; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -58,6 +62,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -129,7 +135,6 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } - @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -137,9 +142,9 @@ public void teardownMiniCluster() { @Test public void ingestion1Old1NewFileTest() { /* This test case is for testing the functionality of the ingestion when there are files already present in the database before starting ingestion. - 14 records are inserted to HDFS database before starting ingestion, with 124/140 records in mock kafka consumer ready for ingestion. - Partitions through 1 to 9 will have only a single file, partition 0 will have 2 files (0.9 and 0.13). - partition 0 files are pre-made and inserted to the HDFS database with old timestamp for file 0.9 and new for 0.13. + 14 records are inserted to HDFS database before starting ingestion, with 126/160 records in mock kafka consumer ready for ingestion (20 broken records + 14 records already in HDFS). + Partitions through 1 to 9 will have a single local file each with each containing 14 records. Partition 0 will have 3 files, 0.9 and 0.13 in HDFS and one empty local file. + partition 0 HDFS files are pre-made and inserted to the HDFS database with old timestamp for file 0.9 and new for 0.13. Old files are pruned from the database during ingestion topic scan loops.*/ assertDoesNotThrow(() -> { @@ -155,20 +160,42 @@ public void ingestion1Old1NewFileTest() { Thread.sleep(10000); hdfsDataIngestion.run(); - // Assert that the kafka records were ingested and pruned correctly and the database holds only the expected 10 files. + // Assert that the kafka records were ingested and pruned correctly and the database holds only the expected 1 file. Assertions - .assertEquals(10, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); + .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "1.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "2.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "3.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "4.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "5.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "6.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "7.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "8.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "9.13"))); + + // Assert the avro-files that were too small to be stored in HDFS. + String path1 = config.getQueueDirectory() + "/" + "testConsumerTopic0.1"; + File avroFile1 = new File(path1); + Assertions.assertTrue(avroFile1.exists()); + DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader1 = new DataFileReader<>(avroFile1, datumReader1); + Assertions.assertFalse(reader1.hasNext()); + reader1.close(); + avroFile1.delete(); + + List filenameList = new ArrayList<>(); + for (int i = 1; i <= 9; i++) { + filenameList.add("testConsumerTopic" + i + "." + 1); + } + for (String fileName : filenameList) { + String path2 = config.getQueueDirectory() + "/" + fileName; + File avroFile = new File(path2); + Assertions.assertTrue(filenameList.contains(avroFile.getName())); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); + for (int i = 0; i <= 13; i++) { + Assertions.assertTrue(reader.hasNext()); + SyslogRecord record = reader.next(); + Assertions.assertEquals(i, record.getOffset()); + } + Assertions.assertFalse(reader.hasNext()); + reader.close(); + avroFile.delete(); + } + }); } } diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index e8c52297..bfda7744 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -45,11 +45,15 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.specific.SpecificDatumReader; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -147,6 +151,19 @@ public void failNonRFC5424DatabaseOutputTest() { Assertions.assertEquals("com.teragrep.rlo_06.PriorityParseException: PRIORITY < missing", e.getMessage()); Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. + + // Assert the local avro file that should e empty. + File queueDirectory = new File(config.getQueueDirectory()); + File[] files = queueDirectory.listFiles(); + Assertions.assertEquals(1, files.length); + String path2 = config.getQueueDirectory() + "/" + "topicName0.1"; + File avroFile = new File(path2); + Assertions.assertTrue(avroFile.exists()); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); + Assertions.assertFalse(reader.hasNext()); + reader.close(); + avroFile.delete(); }); } @@ -194,6 +211,19 @@ public void failNullRFC5424DatabaseOutputTest() { ); Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. + + // Assert the local avro file that should e empty. + File queueDirectory = new File(config.getQueueDirectory()); + File[] files = queueDirectory.listFiles(); + Assertions.assertEquals(1, files.length); + String path2 = config.getQueueDirectory() + "/" + "topicName0.1"; + File avroFile = new File(path2); + Assertions.assertTrue(avroFile.exists()); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); + Assertions.assertFalse(reader.hasNext()); + reader.close(); + avroFile.delete(); }); } From a29bc6a86c2d5f7706b715e55969b4e91ba7a187 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 29 Aug 2024 10:48:14 +0300 Subject: [PATCH 22/77] Refactored Ingestion2NewFilesTest.java. --- .../cfe_39/Ingestion2NewFilesTest.java | 460 ++---------------- 1 file changed, 29 insertions(+), 431 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 7de3207d..e40935cb 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -48,9 +48,9 @@ import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; -import org.apache.avro.file.DataFileStream; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; import org.apache.avro.specific.SpecificDatumReader; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.junit.jupiter.api.*; @@ -59,7 +59,6 @@ import org.slf4j.LoggerFactory; import java.io.File; -import java.net.URI; import java.nio.file.Files; import java.util.ArrayList; import java.util.List; @@ -132,7 +131,6 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } - @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -140,8 +138,8 @@ public void teardownMiniCluster() { @Test public void ingestion2NewFilesTest() { /* This test case is for testing the functionality of the ingestion when there are files already present in the database before starting ingestion. - 14 records are inserted to HDFS database before starting ingestion, with 124/140 records in mock kafka consumer ready for ingestion. - Partitions through 1 to 9 will have only a single file, partition 0 will have 2 files (0.9 and 0.13) that are inserted to the database before starting ingestion. + 14 records are inserted to HDFS database before starting ingestion, with 126/160 records in mock kafka consumer ready for ingestion (20 broken records + 14 records already in HDFS). + Partitions through 1 to 9 will have only a single temporary avro-file that isn't stored to HDFS (size too small), partition 0 will have 2 files (0.9 and 0.13) that are inserted to the database before starting ingestion. */ assertDoesNotThrow(() -> { // Assert the known starting state. @@ -155,441 +153,41 @@ public void ingestion2NewFilesTest() { Thread.sleep(10000); hdfsDataIngestion.run(); - // Assert that the kafka records were ingested correctly and the database holds the expected 11 files holding the expected 140 records. + // Assert that the kafka records were ingested correctly and the database holds the expected 2 files. Assertions - .assertEquals(11, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); + .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "1.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "2.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "3.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "4.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "5.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "6.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "7.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "8.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "9.13"))); - }); - // Check that the files were properly written to HDFS. - String hdfsuri = config.getHdfsuri(); + // Assert the avro-files that were too small to be stored in HDFS. + String path1 = config.getQueueDirectory() + "/" + "testConsumerTopic0.1"; + File avroFile1 = new File(path1); + Assertions.assertTrue(avroFile1.exists()); + DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader1 = new DataFileReader<>(avroFile1, datumReader1); + Assertions.assertFalse(reader1.hasNext()); // Partition 0 avro-file should be empty. + reader1.close(); + avroFile1.delete(); - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; - // ====== Init HDFS File System Object - Configuration conf = new Configuration(); - // Set FileSystem URI - conf.set("fs.defaultFS", hdfsuri); - // Because of Maven - conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); - conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - // Set HADOOP user - System.setProperty("HADOOP_USER_NAME", "hdfs"); - System.setProperty("hadoop.home.dir", "/"); - //Get the filesystem - HDFS - assertDoesNotThrow(() -> { - fs = FileSystem.get(URI.create(hdfsuri), conf); - - Path workingDir = fs.getWorkingDirectory(); - Path newDirectoryPath = new Path(path); - Assertions.assertTrue(fs.exists(newDirectoryPath)); - - /* This is the HDFS write path for the files: - Path hdfswritepath = new Path(newDirectoryPath + "/" + fileName); where newDirectoryPath is config.getHdfsPath() + "/" + lastObject.topic; and filename is lastObject.partition+"."+lastObject.offset; - - Create the list of files to read from HDFS. Test setup is created so each of the 1-9 partitions will have 1 file with offset of 13, while the 0th partition will have 2 files with offset 9 and 13.*/ List filenameList = new ArrayList<>(); - filenameList.add("0.9"); - filenameList.add("0.13"); - for (int i = 1; i <= 9; i++) { - filenameList.add(i + "." + 13); - } - FileStatus[] fileStatuses = fs.listStatus(newDirectoryPath); - Assertions.assertEquals(filenameList.size(), fileStatuses.length); - for (FileStatus fileStatus : fileStatuses) { - Assertions.assertTrue(filenameList.contains(fileStatus.getPath().getName())); - } - LOGGER.info("All expected files present in HDFS."); - - int partitionCounter = 0; - - // Assertions for file testConsumerTopic/0.9 - String fileName0 = filenameList.get(0); - Assertions.assertEquals("0.9", fileName0); - // Assert that file testConsumerTopic/0.9 has expected content. - LOGGER.info("Read file into hdfs"); - //Create a path - Path hdfsreadpath = new Path(newDirectoryPath + "/" + fileName0); // The path should be the same that was used in writing the file to HDFS. - //Init input stream - FSDataInputStream inputStream = fs.open(hdfsreadpath); - //The data is in AVRO-format, so it can't be read as a string. - DataFileStream reader = new DataFileStream<>( - inputStream, - new SpecificDatumReader<>(SyslogRecord.class) - ); - SyslogRecord record = null; - LOGGER.info("\nReading records from file {}:", hdfsreadpath); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872090804000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 0, \"origin\": \"jla-02.default\", \"payload\": \"[WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872090806000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 1, \"origin\": \"jla-02.default\", \"payload\": \"[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872090822000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 2, \"origin\": \"jla-02\", \"payload\": \"470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi.\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872090822000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 3, \"origin\": \"jla-02\", \"payload\": \"470646 [Thread-3] INFO com.teragrep.jla_02.Logback Audit - Logback-audit says hi.\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872090822000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 4, \"origin\": \"jla-02\", \"payload\": \"470647 [Thread-3] INFO com.teragrep.jla_02.Logback Metric - Logback-metric says hi.\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092238000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 5, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.238 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info audit says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092239000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 6, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info daily says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092239000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 7, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info metric says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); + for (int partition = 1; partition <= 9; partition++) { + filenameList.add("testConsumerTopic" + partition + "." + 1); } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092240000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 8, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn audit says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092240000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 9, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn daily says hi!]\"}", - record.toString() - ); - - Assertions.assertFalse(reader.hasNext()); // Reached the end of the testConsumerTopic/0.9 file. - inputStream.close(); - filenameList.remove(0); - - // Assertions for file testConsumerTopic/0.13 - fileName0 = filenameList.get(0); - Assertions.assertEquals("0.13", fileName0); - LOGGER.info("Read file into hdfs"); - //Create a path - hdfsreadpath = new Path(newDirectoryPath + "/" + fileName0); // The path should be the same that was used in writing the file to HDFS. - //Init input stream - inputStream = fs.open(hdfsreadpath); - //The data is in AVRO-format, so it can't be read as a string. - reader = new DataFileStream<>(inputStream, new SpecificDatumReader<>(SyslogRecord.class)); - record = null; - LOGGER.info("\nReading records from file {}:", hdfsreadpath); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092241000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 10, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.241 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn metric says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092241000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 11, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.241 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error audit says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092242000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 12, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.242 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error daily says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug(record.toString()); - } - Assertions - .assertEquals( - "{\"timestamp\": 1650872092243000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 13, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]\"}", - record.toString() - ); - Assertions.assertFalse(reader.hasNext()); // Reached the end of the testConsumerTopic/0.13 file. - inputStream.close(); - filenameList.remove(0); - - partitionCounter++; - for (String fileName : filenameList) { - //==== Read files - LOGGER.info("Read file into hdfs"); - //Create a path - hdfsreadpath = new Path(newDirectoryPath + "/" + fileName); // The path should be the same that was used in writing the file to HDFS. - //Init input stream - inputStream = fs.open(hdfsreadpath); - //The data is in AVRO-format, so it can't be read as a string. - reader = new DataFileStream<>(inputStream, new SpecificDatumReader<>(SyslogRecord.class)); - record = null; - LOGGER.info("\nReading records from file {}:", hdfsreadpath); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872090804000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 0, \"origin\": \"jla-02.default\", \"payload\": \"[WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872090806000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 1, \"origin\": \"jla-02.default\", \"payload\": \"[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872090822000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 2, \"origin\": \"jla-02\", \"payload\": \"470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi.\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872090822000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 3, \"origin\": \"jla-02\", \"payload\": \"470646 [Thread-3] INFO com.teragrep.jla_02.Logback Audit - Logback-audit says hi.\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872090822000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 4, \"origin\": \"jla-02\", \"payload\": \"470647 [Thread-3] INFO com.teragrep.jla_02.Logback Metric - Logback-metric says hi.\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092238000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 5, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.238 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info audit says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092239000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 6, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info daily says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092239000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 7, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info metric says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092240000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 8, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn audit says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092240000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 9, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn daily says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092241000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 10, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.241 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn metric says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092241000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 11, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.241 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error audit says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092242000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 12, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.242 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error daily says hi!]\"}", - record.toString() - ); - - Assertions.assertTrue(reader.hasNext()); - record = reader.next(record); - Assertions - .assertEquals( - "{\"timestamp\": 1650872092243000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"" - + partitionCounter - + "\", \"offset\": 13, \"origin\": \"jla-02.default\", \"payload\": \"25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]\"}", - record.toString() - ); + String path2 = config.getQueueDirectory() + "/" + fileName; + File avroFile = new File(path2); + Assertions.assertTrue(filenameList.contains(avroFile.getName())); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); + for (int offset = 0; offset <= 13; offset++) { + Assertions.assertTrue(reader.hasNext()); + SyslogRecord record = reader.next(); + Assertions.assertEquals(offset, record.getOffset()); + } Assertions.assertFalse(reader.hasNext()); - LOGGER.info("Partition {} passed assertions.", partitionCounter); - partitionCounter++; - inputStream.close(); + reader.close(); + avroFile.delete(); } - Assertions.assertEquals(10, partitionCounter); }); } } From 8d9627010ccc79c974fd0f7e029561a499871e61 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 29 Aug 2024 11:06:16 +0300 Subject: [PATCH 23/77] Refactored Ingestion2OldFilesTest.java --- .../cfe_39/Ingestion2OldFilesTest.java | 53 +++++++++++++------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index b059b9de..69888c0c 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -45,8 +45,12 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.specific.SpecificDatumReader; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -58,6 +62,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -130,7 +136,6 @@ public void teardownMiniCluster() { FileUtil.fullyDelete(baseDir); } - @Disabled(value = "This needs refactoring") @DisabledIfSystemProperty( named = "skipIngestionTest", matches = "true" @@ -155,21 +160,39 @@ public void ingestion2OldFilesTest() { Thread.sleep(10000); hdfsDataIngestion.run(); - // Assert that the kafka records were ingested and pruned correctly and the database holds only the expected 9 files. + // Assert that the kafka records were ingested and pruned correctly and the database doesn't hold any files. Assertions - .assertEquals(9, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions - .assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "1.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "2.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "3.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "4.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "5.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "6.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "7.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "8.13"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "9.13"))); + .assertEquals(0, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); + + // Assert the avro-files that were too small to be stored in HDFS. + String path1 = config.getQueueDirectory() + "/" + "testConsumerTopic0.1"; + File avroFile1 = new File(path1); + Assertions.assertTrue(avroFile1.exists()); + DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader1 = new DataFileReader<>(avroFile1, datumReader1); + Assertions.assertFalse(reader1.hasNext()); // Partition 0 avro-file should be empty. + reader1.close(); + avroFile1.delete(); + + List filenameList = new ArrayList<>(); + for (int i = 1; i <= 9; i++) { + filenameList.add("testConsumerTopic" + i + "." + 1); + } + for (String fileName : filenameList) { + String path2 = config.getQueueDirectory() + "/" + fileName; + File avroFile = new File(path2); + Assertions.assertTrue(filenameList.contains(avroFile.getName())); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); + for (int i = 0; i <= 13; i++) { + Assertions.assertTrue(reader.hasNext()); + SyslogRecord record = reader.next(); + Assertions.assertEquals(i, record.getOffset()); + } + Assertions.assertFalse(reader.hasNext()); + reader.close(); + avroFile.delete(); + } }); } } From b625bbbf99de8fbba518996e930395fa06026070 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 29 Aug 2024 12:00:08 +0300 Subject: [PATCH 24/77] Refactored KafkaConsumerTest.java --- .../teragrep/cfe_39/KafkaConsumerTest.java | 611 +++--------------- 1 file changed, 107 insertions(+), 504 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 2bc03f5f..565ac878 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -45,20 +45,26 @@ */ package com.teragrep.cfe_39; +import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; -import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; -import com.teragrep.rlo_06.ParseException; +import com.teragrep.cfe_39.metrics.DurationStatistics; +import com.teragrep.cfe_39.metrics.topic.TopicCounter; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.kafka.common.TopicPartition; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.nio.file.Files; import java.util.*; -import java.util.function.Consumer; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -66,22 +72,61 @@ public class KafkaConsumerTest { private static final Logger LOGGER = LoggerFactory.getLogger(KafkaConsumerTest.class); - @Disabled(value = "This needs refactoring") - @Test - public void readCoordinatorTest2Threads() { + private static MiniDFSCluster hdfsCluster; + private static File baseDir; + private static Config config; + private FileSystem fs; + + // Prepares known state for testing. + @BeforeEach + public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. + // Set system properties to use the valid configuration with skipping of broken records disabled. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Config config = new Config(); + config = new Config(); + // Create a HDFS miniCluster + baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); + hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); + fs = new TestFileSystemFactory().create(config.getHdfsuri()); + }); + } + + // Teardown the minicluster + @AfterEach + public void teardownMiniCluster() { + assertDoesNotThrow(() -> { + fs.close(); + }); + hdfsCluster.shutdown(); + FileUtil.fullyDelete(baseDir); + } + + @Test + public void readCoordinatorTest2Threads() { + assertDoesNotThrow(() -> { Map hdfsStartOffsets = new HashMap<>(); - ArrayList> messages = new ArrayList<>(); - Consumer> output = message -> messages.add(message); // FIXME: Lambda does not work with BatchDistributionImpl interface + DurationStatistics durationStatistics = new DurationStatistics(); + durationStatistics.register(); + // BatchDistributionImpl can not be used as a functional interface. + BatchDistributionImpl output1 = new BatchDistributionImpl( + config, // Configuration settings + "topicName", // String, the name of the topic + durationStatistics, // RuntimeStatistics object from metrics + new TopicCounter("topicName") // TopicCounter object from metrics + ); + BatchDistributionImpl output2 = new BatchDistributionImpl( + config, // Configuration settings + "topicName", // String, the name of the topic + durationStatistics, // RuntimeStatistics object from metrics + new TopicCounter("topicName") // TopicCounter object from metrics + ); ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", config.getKafkaConsumerProperties(), - (BatchDistributionImpl) output, // FIXME: Dont/Can't use casting like this. + output1, hdfsStartOffsets ); Thread readThread = new Thread(null, readCoordinator, "testConsumerTopic1"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. @@ -92,531 +137,89 @@ public void readCoordinatorTest2Threads() { ReadCoordinator readCoordinator2 = new ReadCoordinator( "testConsumerTopic", config.getKafkaConsumerProperties(), - (BatchDistributionImpl) output, // FIXME: Dont/Can't use casting like this. + output2, hdfsStartOffsets ); Thread readThread2 = new Thread(null, readCoordinator2, "testConsumerTopic2"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. readThread2.start(); // Starts the thread, in other words proceeds to call run() function of ReadCoordinator. Thread.sleep(10000); - Assertions.assertEquals(2, messages.size()); - Assertions.assertEquals(160, messages.get(0).size() + messages.get(1).size()); // Assert that expected amount of records has been consumed by the consumer group. - Assertions.assertEquals(80, messages.get(0).size()); - Assertions.assertEquals(80, messages.get(1).size()); - - // Assert that all the record contents are correct, every topic partition has identical set of offset-message pairings. - List messageList = new ArrayList(); - messageList.add("[WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!"); - messageList.add("[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!"); - messageList.add("470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi."); - messageList.add("470646 [Thread-3] INFO com.teragrep.jla_02.Logback Audit - Logback-audit says hi."); - messageList.add("470647 [Thread-3] INFO com.teragrep.jla_02.Logback Metric - Logback-metric says hi."); - messageList - .add( - "25.04.2022 07:34:52.238 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info audit says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info daily says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info metric says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn audit says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn daily says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.241 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn metric says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.241 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error audit says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.242 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error daily says hi!]" - ); - messageList - .add( - "25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" - ); - - KafkaRecordImpl kafkaRecord; - - Iterator iterator = messageList.iterator(); - int counter = 0; - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":7, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord = kafkaRecord; - ParseException e = Assertions.assertThrows(ParseException.class, finalKafkaRecord::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":5, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord1 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord1::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":3, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord2 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord2::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":1, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord3 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord3::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; + // Because BatchDistributionImpl can not be used as a functional interface, must do assertion through avro-files until better solution is found (add fake to interface?). - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; + // Assert the records inside the avro-files + List filenameList = new ArrayList<>(); + for (int i = 0; i <= 9; i++) { + filenameList.add("testConsumerTopic" + i + "." + 1); } - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":9, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord4 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord4::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - Assertions.assertEquals(80, counter); - - counter = 0; - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":8, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord5 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord5::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":6, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - - KafkaRecordImpl finalKafkaRecord6 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord6::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":4, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord7 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord7::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; - } - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":2, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord8 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord8::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - iterator = messageList.iterator(); - for (int i = 0; i <= 13; i++) { - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + i + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), kafkaRecord.toSyslogRecord().getPayload().toString()); - counter++; + for (String fileName : filenameList) { + String path2 = config.getQueueDirectory() + "/" + fileName; + File avroFile = new File(path2); + Assertions.assertTrue(filenameList.contains(avroFile.getName())); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); + for (int i = 0; i <= 13; i++) { + Assertions.assertTrue(reader.hasNext()); + SyslogRecord record = reader.next(); + Assertions.assertEquals(i, record.getOffset()); + } + Assertions.assertFalse(reader.hasNext()); + reader.close(); + avroFile.delete(); } - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + 14 + "}", - kafkaRecord.offsetToJSON() - ); - Assertions.assertEquals(0, kafkaRecord.size()); - counter++; - - kafkaRecord = messages.get(1).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":" + 15 + "}", - kafkaRecord.offsetToJSON() - ); - KafkaRecordImpl finalKafkaRecord9 = kafkaRecord; - e = Assertions.assertThrows(ParseException.class, finalKafkaRecord9::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; - - Assertions.assertEquals(80, counter); - }); } - @Disabled(value = "This needs refactoring") @Test public void readCoordinatorTest1Thread() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Config config = new Config(); Map hdfsStartOffsets = new HashMap<>(); - ArrayList> messages = new ArrayList<>(); - Consumer> output = message -> messages.add(message); + DurationStatistics durationStatistics = new DurationStatistics(); + durationStatistics.register(); + // BatchDistributionImpl can not be used as a functional interface. + BatchDistributionImpl output = new BatchDistributionImpl( + config, // Configuration settings + "topicName", // String, the name of the topic + durationStatistics, // RuntimeStatistics object from metrics + new TopicCounter("topicName") // TopicCounter object from metrics + ); ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", config.getKafkaConsumerProperties(), - (BatchDistributionImpl) output, // FIXME: Dont/Can't use casting like this. + output, hdfsStartOffsets ); Thread readThread = new Thread(null, readCoordinator, "testConsumerTopic0"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. readThread.start(); // Starts the thread, in other words proceeds to call run() function of ReadCoordinator. Thread.sleep(10000); - Assertions.assertEquals(1, messages.size()); - Assertions.assertEquals(160, messages.get(0).size()); // Assert that expected amount of records has been consumed by the consumer. - // Assert that all the record contents are correct, every topic partition has identical set of offset-message pairings. - List list = new ArrayList(); - list.add("[WARN] 2022-04-25 07:34:50,804 com.teragrep.jla_02.Log4j Log - Log4j warn says hi!"); - list.add("[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!"); - list.add("470647 [Thread-3] INFO com.teragrep.jla_02.Logback Daily - Logback-daily says hi."); - list - .add( - "470646 [Thread-3] INFO com.teragrep@Disabled(value = \"This needs refactoring\").jla_02.Logback Audit - Logback-audit says hi." - ); - list.add("470647 [Thread-3] INFO com.teragrep.jla_02.Logback Metric - Logback-metric says hi."); - list - .add( - "25.04.2022 07:34:52.238 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info audit says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info daily says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.239 [INFO] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 info metric says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn audit says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.240 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn daily says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.241 [WARN] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 warn metric says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.241 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error audit says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.242 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error daily says hi!]" - ); - list - .add( - "25.04.2022 07:34:52.243 [ERROR] com.teragrep.jla_02.Log4j2 [instanceId=01, thread=Thread-0, userId=, sessionId=, requestId=, SUBJECT=, VERB=, OBJECT=, OUTCOME=, message=Log4j2 error metric says hi!]" - ); + // Because BatchDistributionImpl can not be used as a functional interface, must do assertion through avro-files until better solution is found (add fake to interface?). - KafkaRecordImpl recordOffset; - Iterator iterator; - List partitionList = new ArrayList(); - partitionList.add(7); - partitionList.add(8); - partitionList.add(5); - partitionList.add(6); - partitionList.add(3); - partitionList.add(4); - partitionList.add(1); - partitionList.add(2); - partitionList.add(0); - partitionList.add(9); - int counter = 0; - for (int partition : partitionList) { - iterator = list.iterator(); + // Assert the records inside the avro-files + List filenameList = new ArrayList<>(); + for (int i = 0; i <= 9; i++) { + filenameList.add("testConsumerTopic" + i + "." + 1); + } + for (String fileName : filenameList) { + String path2 = config.getQueueDirectory() + "/" + fileName; + File avroFile = new File(path2); + Assertions.assertTrue(filenameList.contains(avroFile.getName())); + DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); + DataFileReader reader = new DataFileReader<>(avroFile, datumReader); for (int i = 0; i <= 13; i++) { - recordOffset = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":" + partition + ", \"offset\":" + i - + "}", - recordOffset.offsetToJSON() - ); - Assertions.assertTrue(iterator.hasNext()); - Assertions.assertEquals(iterator.next(), recordOffset.toSyslogRecord().getPayload().toString()); - counter++; + Assertions.assertTrue(reader.hasNext()); + SyslogRecord record = reader.next(); + Assertions.assertEquals(i, record.getOffset()); } - - recordOffset = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":" + partition + ", \"offset\":" + 14 - + "}", - recordOffset.offsetToJSON() - ); - Assertions.assertEquals(0, recordOffset.size()); - counter++; - - recordOffset = messages.get(0).get(counter); - Assertions - .assertEquals( - "{\"topic\":\"testConsumerTopic\", \"partition\":" + partition + ", \"offset\":" + 15 - + "}", - recordOffset.offsetToJSON() - ); - ParseException e = Assertions.assertThrows(ParseException.class, recordOffset::toSyslogRecord); - Assertions.assertEquals("PRIORITY < missing", e.getMessage()); - counter++; + Assertions.assertFalse(reader.hasNext()); + reader.close(); + avroFile.delete(); } - Assertions.assertEquals(160, counter); // All 160 records were asserted. - }); } From fecf04cd01a68a73e35cdb2a59572c548dc967d6 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 29 Aug 2024 12:33:03 +0300 Subject: [PATCH 25/77] Added logging. --- .../cfe_39/consumers/kafka/PartitionFileImpl.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index d6f0fb2a..8aa1b148 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -80,6 +80,9 @@ public class PartitionFileImpl implements PartitionFile { try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { // Initializes the syslogFile. } + if(LOGGER.isDebugEnabled()) { + LOGGER.debug("PartitionFileImpl representing topic {} partition {} initialized successfully. syslogFile allocated to the object is located at {}", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath()); + } } @Override @@ -105,10 +108,16 @@ public void commitRecords() throws IOException { } // Store the last offset of the batch to a list. if (storedOffset > 0) { + if(LOGGER.isDebugEnabled()) { + LOGGER.debug("Kafka Batch for topic {} partition {} processed successfully. Final record offset of the batch was {}.", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset); + } batchOffsets.add(storedOffset); } // No records mean consumer group rebalance happened, write file to HDFS. if (syslogRecordList.isEmpty()) { + if(LOGGER.isDebugEnabled()) { + LOGGER.debug("Kafka Batch for topic {} partition {} was empty. Final record offset of the batch was {}. Proceeding to write the existing syslogFile to HDFS.", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset); + } writeToHdfsEarly(); } } @@ -122,6 +131,9 @@ public void writeToHdfsEarly() throws IOException { @Override public void rebalance() { + if(LOGGER.isDebugEnabled()) { + LOGGER.debug("PartitionFileImpl-object representing topic {} partition {} was notified of consumer group rebalance. Deleting syslogFile allocated to the object at {}", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath()); + } syslogFile.delete(); } @@ -137,6 +149,9 @@ private void writeToHdfs(long offset) throws IOException { // NoOp, syslogAvroWriter has initialized the empty AVRO-file. } batchOffsets.clear(); + if(LOGGER.isDebugEnabled()) { + LOGGER.debug("SyslogFile representing topic {} partition {} stored to HDFS with offset value of {}. SyslogFile allocated to the object is located at {}", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset, syslogFile.getPath()); + } } } From c9dfa7fffb945c4a68adcedd4c2bbeb798947b22 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 29 Aug 2024 12:41:25 +0300 Subject: [PATCH 26/77] Spotless. --- .../consumers/kafka/PartitionFileImpl.java | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 8aa1b148..85bbbd89 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -80,8 +80,12 @@ public class PartitionFileImpl implements PartitionFile { try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { // Initializes the syslogFile. } - if(LOGGER.isDebugEnabled()) { - LOGGER.debug("PartitionFileImpl representing topic {} partition {} initialized successfully. syslogFile allocated to the object is located at {}", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath()); + if (LOGGER.isDebugEnabled()) { + LOGGER + .debug( + "PartitionFileImpl representing topic {} partition {} initialized successfully. syslogFile allocated to the object is located at {}", + topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath() + ); } } @@ -108,15 +112,23 @@ public void commitRecords() throws IOException { } // Store the last offset of the batch to a list. if (storedOffset > 0) { - if(LOGGER.isDebugEnabled()) { - LOGGER.debug("Kafka Batch for topic {} partition {} processed successfully. Final record offset of the batch was {}.", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset); + if (LOGGER.isDebugEnabled()) { + LOGGER + .debug( + "Kafka Batch for topic {} partition {} processed successfully. Final record offset of the batch was {}.", + topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset + ); } batchOffsets.add(storedOffset); } // No records mean consumer group rebalance happened, write file to HDFS. if (syslogRecordList.isEmpty()) { - if(LOGGER.isDebugEnabled()) { - LOGGER.debug("Kafka Batch for topic {} partition {} was empty. Final record offset of the batch was {}. Proceeding to write the existing syslogFile to HDFS.", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset); + if (LOGGER.isDebugEnabled()) { + LOGGER + .debug( + "Kafka Batch for topic {} partition {} was empty. Final record offset of the batch was {}. Proceeding to write the existing syslogFile to HDFS.", + topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset + ); } writeToHdfsEarly(); } @@ -131,8 +143,12 @@ public void writeToHdfsEarly() throws IOException { @Override public void rebalance() { - if(LOGGER.isDebugEnabled()) { - LOGGER.debug("PartitionFileImpl-object representing topic {} partition {} was notified of consumer group rebalance. Deleting syslogFile allocated to the object at {}", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath()); + if (LOGGER.isDebugEnabled()) { + LOGGER + .debug( + "PartitionFileImpl-object representing topic {} partition {} was notified of consumer group rebalance. Deleting syslogFile allocated to the object at {}", + topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath() + ); } syslogFile.delete(); } @@ -149,8 +165,12 @@ private void writeToHdfs(long offset) throws IOException { // NoOp, syslogAvroWriter has initialized the empty AVRO-file. } batchOffsets.clear(); - if(LOGGER.isDebugEnabled()) { - LOGGER.debug("SyslogFile representing topic {} partition {} stored to HDFS with offset value of {}. SyslogFile allocated to the object is located at {}", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset, syslogFile.getPath()); + if (LOGGER.isDebugEnabled()) { + LOGGER + .debug( + "SyslogFile representing topic {} partition {} stored to HDFS with offset value of {}. SyslogFile allocated to the object is located at {}", + topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset, syslogFile.getPath() + ); } } From 33c80e29579af74c6cd62ba694c9634e535e7010 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 30 Aug 2024 12:31:43 +0300 Subject: [PATCH 27/77] Renamed IngestionRebalanceListener.java to ConsumerRebalanceListenerImpl.java. Refactored ConsumerRebalanceListenerImpl to support topic partition offset tracking. Fixed listener registration for the consumer in ReadCoordinator.java. Refactored MockKafkaConsumerFactory to use subscribe method instead of assign. --- ...ava => ConsumerRebalanceListenerImpl.java} | 31 ++++++++++- .../cfe_39/consumers/kafka/KafkaReader.java | 10 +++- .../kafka/MockKafkaConsumerFactory.java | 55 ++++++++----------- .../consumers/kafka/ReadCoordinator.java | 12 +++- 4 files changed, 68 insertions(+), 40 deletions(-) rename src/main/java/com/teragrep/cfe_39/consumers/kafka/{IngestionRebalanceListener.java => ConsumerRebalanceListenerImpl.java} (65%) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/IngestionRebalanceListener.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java similarity index 65% rename from src/main/java/com/teragrep/cfe_39/consumers/kafka/IngestionRebalanceListener.java rename to src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index ccdda787..52a205b6 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/IngestionRebalanceListener.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -47,28 +47,55 @@ import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; +import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; -public class IngestionRebalanceListener implements ConsumerRebalanceListener { +public class ConsumerRebalanceListenerImpl implements ConsumerRebalanceListener { + private final Logger LOGGER = LoggerFactory.getLogger(ConsumerRebalanceListenerImpl.class); private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; + private final Map currentOffsets = new HashMap<>(); - public IngestionRebalanceListener(Consumer kafkaConsumer, BatchDistributionImpl callbackFunction) { + public ConsumerRebalanceListenerImpl( + Consumer kafkaConsumer, + BatchDistributionImpl callbackFunction + ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; } + public void addOffsetToTrack(String topic, int partition, long offset) { + currentOffsets.put(new TopicPartition(topic, partition), new OffsetAndMetadata(offset + 1, null)); + // 1. Add listener to be an input parameter for callbackFunction.accept() call. + // 2. Call the addOffsetToTrack() every time a file is stored to HDFS. + // 3. Finally remove the try/catch from BatchDistributionImpl and instead let the KafkaReader to try/catch the exception. + // 4. In KafkaReader commit the offsets using the listener's getCurrentOffsets() method, and then re-throw the exception. + } + + // this is used when we shut down our consumer gracefully + public Map getCurrentOffsets() { + return currentOffsets; + } + @Override public void onPartitionsRevoked(Collection collection) { // Flush any records from the temporary files to HDFS to synchronize database with committed kafka offsets, and clean up PartitionFile list. + LOGGER.info("onPartitionsRevoked triggered"); callbackFunction.rebalance(); + LOGGER.info("Committing offsets <{}>", currentOffsets); + kafkaConsumer.commitSync(currentOffsets); } @Override public void onPartitionsAssigned(Collection collection) { + LOGGER.info("onPartitionsAssigned triggered"); // NoOp: records and offsets are already stored to HDFS by the callbackFunction.rebalance(), and kafka coordinator should handle committed offsets automatically. } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index 453b532f..2bd7e2cc 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -58,12 +58,16 @@ public class KafkaReader implements AutoCloseable { private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; - private final IngestionRebalanceListener ingestionRebalanceListener; + private final ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl; - public KafkaReader(Consumer kafkaConsumer, BatchDistributionImpl callbackFunction) { + public KafkaReader( + Consumer kafkaConsumer, + BatchDistributionImpl callbackFunction, + ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl + ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; - this.ingestionRebalanceListener = new IngestionRebalanceListener(this.kafkaConsumer, this.callbackFunction); + this.consumerRebalanceListenerImpl = consumerRebalanceListenerImpl; } public void read() { diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java index e5da3a81..6b4c81bb 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java @@ -265,45 +265,34 @@ public static Consumer getConsumer(int threadnum) { // generate the topic partitions and metadata first for (int i = 0; i < amountofloops; i++) { TopicPartition topicPartition = new TopicPartition("testConsumerTopic", i); - topicPartitions.add(topicPartition); - beginningOffsets.put(topicPartition, 0L); - endOffsets.put(topicPartition, 14L); - mockPartitionInfo.add(new PartitionInfo("testConsumerTopic", i, null, null, null)); - } - - if (threadnum == 1) { - List oddTopicPartitions = new ArrayList<>(); - for (TopicPartition a : topicPartitions) { - if (((a.partition() + 1) % 2) == 0) { - oddTopicPartitions.add(a); - } - } - consumer.assign(oddTopicPartitions); // assign - for (TopicPartition a : topicPartitions) { - if (((a.partition() + 1) % 2) == 0) { - generateEvents(consumer, a.topic(), a.partition()); + if (threadnum == 1) { + if (((i + 1) % 2) == 0) { + topicPartitions.add(topicPartition); + beginningOffsets.put(topicPartition, 0L); + endOffsets.put(topicPartition, 14L); + mockPartitionInfo.add(new PartitionInfo("testConsumerTopic", i, null, null, null)); } } - } - else if (threadnum == 2) { - List evenTopicPartitions = new ArrayList<>(); - for (TopicPartition a : topicPartitions) { - if (((a.partition() + 1) % 2) != 0) { - evenTopicPartitions.add(a); + else if (threadnum == 2) { + if (((i + 1) % 2) != 0) { + topicPartitions.add(topicPartition); + beginningOffsets.put(topicPartition, 0L); + endOffsets.put(topicPartition, 14L); + mockPartitionInfo.add(new PartitionInfo("testConsumerTopic", i, null, null, null)); } } - consumer.assign(evenTopicPartitions); // assign - for (TopicPartition a : topicPartitions) { - if (((a.partition() + 1) % 2) != 0) { - generateEvents(consumer, a.topic(), a.partition()); - } + else { + topicPartitions.add(topicPartition); + beginningOffsets.put(topicPartition, 0L); + endOffsets.put(topicPartition, 14L); + mockPartitionInfo.add(new PartitionInfo("testConsumerTopic", i, null, null, null)); } } - else { - consumer.assign(topicPartitions); // assign - for (TopicPartition a : topicPartitions) { - generateEvents(consumer, a.topic(), a.partition()); - } + + consumer.subscribe(Collections.singletonList("testConsumerTopic")); + consumer.rebalance(topicPartitions); + for (TopicPartition a : topicPartitions) { + generateEvents(consumer, a.topic(), a.partition()); } consumer.updateBeginningOffsets(beginningOffsets); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index 94ef62aa..71c5feca 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -83,16 +83,23 @@ private KafkaReader createKafkaReader( ) { org.apache.kafka.clients.consumer.Consumer kafkaConsumer; + ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl; if (useMockKafkaConsumer) { // Mock kafka consumer is enabled, create mock consumers with assigned partitions that are not overlapping with each other. String name = Thread.currentThread().getName(); // Use thread name to identify which thread is running the code. if (Objects.equals(name, "testConsumerTopic1")) { kafkaConsumer = MockKafkaConsumerFactory.getConsumer(1); // creates a Kafka MockConsumer that has the odd numbered partitions assigned to it. + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } else if (Objects.equals(name, "testConsumerTopic2")) { kafkaConsumer = MockKafkaConsumerFactory.getConsumer(2); // creates a Kafka MockConsumer that has the even numbered partitions assigned to it. + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } else { kafkaConsumer = MockKafkaConsumerFactory.getConsumer(0); // Creates a single Kafka MockConsumer that has all the partitions assigned to it. + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } } else { // Mock kafka consumer is disabled, subscribe method should handle assigning the partitions automatically to the consumer based on group id parameters of readerKafkaProperties. @@ -101,7 +108,8 @@ else if (Objects.equals(name, "testConsumerTopic2")) { new ByteArrayDeserializer(), new ByteArrayDeserializer() ); - kafkaConsumer.subscribe(Collections.singletonList(topic)); + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } Set assignment = kafkaConsumer.assignment(); @@ -115,7 +123,7 @@ else if (Objects.equals(name, "testConsumerTopic2")) { } } - return new KafkaReader(kafkaConsumer, callbackFunction); + return new KafkaReader(kafkaConsumer, callbackFunction, consumerRebalanceListenerImpl); } // Part or Runnable implementation, called when the thread is started. From 42164a48fdd6d5d2ea9c698f25b6ab66992a1b05 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 30 Aug 2024 12:32:53 +0300 Subject: [PATCH 28/77] Removed the pointless additional limitation from the number of threads used for consumers. --- .../com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 5028b28c..22a21c5c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -209,8 +209,7 @@ private void createReader( /* Every consumer is run in a separate thread. Consumer group is also handled here, and each consumer of the group runs on separate thread.*/ - int numOfThreads = Math.min(numOfConsumers, listPartitionInfo.size()); // FIXME: Alter the equation for calculating the number of threads. - for (int threadId = 1; numOfThreads >= threadId; threadId++) { + for (int threadId = 1; numOfConsumers >= threadId; threadId++) { BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings topic, // String, the name of the topic From bddd9642ff1dbd4822e8ee8b9448db3119adfd68 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 30 Aug 2024 15:56:23 +0300 Subject: [PATCH 29/77] Fixed constructor. --- .../consumers/kafka/ConsumerRebalanceListenerImpl.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index 52a205b6..c3077378 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -61,7 +61,7 @@ public class ConsumerRebalanceListenerImpl implements ConsumerRebalanceListener private final Logger LOGGER = LoggerFactory.getLogger(ConsumerRebalanceListenerImpl.class); private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; - private final Map currentOffsets = new HashMap<>(); + private final Map currentOffsets; public ConsumerRebalanceListenerImpl( Consumer kafkaConsumer, @@ -69,11 +69,12 @@ public ConsumerRebalanceListenerImpl( ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; + this.currentOffsets = new HashMap<>(); } public void addOffsetToTrack(String topic, int partition, long offset) { currentOffsets.put(new TopicPartition(topic, partition), new OffsetAndMetadata(offset + 1, null)); - // 1. Add listener to be an input parameter for callbackFunction.accept() call. + // 1. Pass listener to callbackFunction. // 2. Call the addOffsetToTrack() every time a file is stored to HDFS. // 3. Finally remove the try/catch from BatchDistributionImpl and instead let the KafkaReader to try/catch the exception. // 4. In KafkaReader commit the offsets using the listener's getCurrentOffsets() method, and then re-throw the exception. From cb30c251e1dbb71b4498ee37614f1188e06b9a17 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 30 Aug 2024 15:57:19 +0300 Subject: [PATCH 30/77] Changed Consumer> references to subtype BatchDistributionImpl. --- src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index bfda7744..49d52217 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -72,7 +72,6 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; -import java.util.function.Consumer; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -123,7 +122,7 @@ public void failNonRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistributionImpl( + BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics @@ -179,7 +178,7 @@ public void failNullRFC5424DatabaseOutputTest() { assertDoesNotThrow(() -> { - Consumer> output = new BatchDistributionImpl( + BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics From 25244a332d13878bb867fe1d41908d863db77b72 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 30 Aug 2024 15:58:28 +0300 Subject: [PATCH 31/77] Implemented initial framework for configuration refactoring. WIP --- .../cfe_39/configuration/Configuration.java | 56 ++++++++++++ .../configuration/ConfigurationImpl.java | 91 +++++++++++++++++++ .../configuration/HdfsConfiguration.java | 49 ++++++++++ .../configuration/HdfsConfigurationImpl.java | 73 +++++++++++++++ .../configuration/KafkaConfiguration.java | 55 +++++++++++ .../configuration/KafkaConfigurationImpl.java | 83 +++++++++++++++++ .../configuration/KerberosConfiguration.java | 49 ++++++++++ .../KerberosConfigurationImpl.java | 73 +++++++++++++++ 8 files changed, 529 insertions(+) create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/Configuration.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java new file mode 100644 index 00000000..de836abf --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java @@ -0,0 +1,56 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +public interface Configuration { + + String valueOf(String key); + + boolean has(String key); + + boolean equals(Object o); + +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java new file mode 100644 index 00000000..ab5f8a6e --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java @@ -0,0 +1,91 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Properties; + +public class ConfigurationImpl implements Configuration { + + private final Logger LOGGER = LoggerFactory.getLogger(Config.class); + private final Properties properties; + private final KafkaConfigurationImpl kafkaConfigurationImpl; + private final HdfsConfigurationImpl hdfsConfigurationImpl; + private final KerberosConfigurationImpl kerberosConfigurationImpl; + + public ConfigurationImpl() throws IOException { + properties = new Properties(); + Path configPath = Paths + .get(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + LOGGER.info("Loading application config <[{}]>", configPath.toAbsolutePath()); + + try (InputStream inputStream = Files.newInputStream(configPath)) { + properties.load(inputStream); + LOGGER.debug("Got configuration: <{}>", properties); + } + + this.kafkaConfigurationImpl = new KafkaConfigurationImpl(properties); + this.hdfsConfigurationImpl = new HdfsConfigurationImpl(properties); + this.kerberosConfigurationImpl = new KerberosConfigurationImpl(properties); + } + + @Override + public String valueOf(String key) { + return ""; + } + + @Override + public boolean has(String key) { + return false; + } +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java new file mode 100644 index 00000000..26929885 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java @@ -0,0 +1,49 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +public interface HdfsConfiguration { +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java new file mode 100644 index 00000000..6249e187 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java @@ -0,0 +1,73 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import java.util.Enumeration; +import java.util.Properties; + +public class HdfsConfigurationImpl implements HdfsConfiguration { + + private final Properties hdfsProperties; + + public HdfsConfigurationImpl(Properties properties) { + this.hdfsProperties = loadSubProperties(properties, "hdfs."); + } + + private Properties loadSubProperties(Properties properties, String prefix) { + Properties subProperties = new Properties(); + + Enumeration keys = properties.keys(); + while (keys.hasMoreElements()) { + String key = String.valueOf(keys.nextElement()); + if (key.startsWith(prefix)) { + String value = properties.getProperty(key); + String subKey = key.replaceFirst(prefix, ""); + subProperties.put(subKey, value); + } + } + return subProperties; + } +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java new file mode 100644 index 00000000..71d57546 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java @@ -0,0 +1,55 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +public interface KafkaConfiguration { + + String valueOf(String key); + + boolean has(String key); + + boolean equals(Object o); +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java new file mode 100644 index 00000000..92af7270 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java @@ -0,0 +1,83 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import java.util.Enumeration; +import java.util.Properties; + +public class KafkaConfigurationImpl implements KafkaConfiguration { + + Properties kafkaConsumerProperties; + + public KafkaConfigurationImpl(Properties properties) { + this.kafkaConsumerProperties = loadSubProperties(properties, "consumer."); + } + + private Properties loadSubProperties(Properties properties, String prefix) { + Properties subProperties = new Properties(); + + Enumeration keys = properties.keys(); + while (keys.hasMoreElements()) { + String key = String.valueOf(keys.nextElement()); + if (key.startsWith(prefix)) { + String value = properties.getProperty(key); + String subKey = key.replaceFirst(prefix, ""); + subProperties.put(subKey, value); + } + } + return subProperties; + } + + @Override + public String valueOf(String key) { + return ""; + } + + @Override + public boolean has(String key) { + return false; + } +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java new file mode 100644 index 00000000..b0bda537 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java @@ -0,0 +1,49 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +public interface KerberosConfiguration { +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java new file mode 100644 index 00000000..34d2109f --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java @@ -0,0 +1,73 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import java.util.Enumeration; +import java.util.Properties; + +public class KerberosConfigurationImpl implements KerberosConfiguration { + + private final Properties kerberosProperties; + + public KerberosConfigurationImpl(Properties properties) { + this.kerberosProperties = loadSubProperties(properties, "kerberos."); + } + + private Properties loadSubProperties(Properties properties, String prefix) { + Properties subProperties = new Properties(); + + Enumeration keys = properties.keys(); + while (keys.hasMoreElements()) { + String key = String.valueOf(keys.nextElement()); + if (key.startsWith(prefix)) { + String value = properties.getProperty(key); + String subKey = key.replaceFirst(prefix, ""); + subProperties.put(subKey, value); + } + } + return subProperties; + } +} From d47a7f1ce25da2ebff2bf5754ce99c5b1f5f5f37 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 2 Sep 2024 10:56:08 +0300 Subject: [PATCH 32/77] Implemented fixes to consumer group rebalance handling to solve issue #42. Implemented timeout handling for flushing records to HDFS in case of consuming taking too long for solving issue #45. --- .../teragrep/cfe_39/configuration/Config.java | 6 + .../kafka/BatchDistributionImpl.java | 1 - .../kafka/ConsumerRebalanceListenerImpl.java | 107 ++++++++++++++---- .../consumers/kafka/HdfsDataIngestion.java | 7 +- .../cfe_39/consumers/kafka/KafkaReader.java | 22 ++-- .../consumers/kafka/ReadCoordinator.java | 33 ++++-- .../teragrep/cfe_39/KafkaConsumerTest.java | 6 +- 7 files changed, 133 insertions(+), 49 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Config.java b/src/main/java/com/teragrep/cfe_39/configuration/Config.java index 445dd0ed..f8bceaef 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Config.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Config.java @@ -82,6 +82,7 @@ public class Config { private final boolean skipEmptyRFC5424Records; private final String dfsDataTransferProtection; private final String dfsEncryptDataTransferCipherSuites; + private final long consumerTimeout; public Config() throws IOException { this("", 0); @@ -183,6 +184,7 @@ public Config(String hdfsuri, long maximumFileSize) throws IOException { // kafka this.queueTopicPattern = properties.getProperty("queueTopicPattern", "^.*$"); this.numOfConsumers = Integer.parseInt(properties.getProperty("numOfConsumers", "1")); + this.consumerTimeout = Long.parseLong(properties.getProperty("pruneOffset", "300000")); // skip non RFC5424 records this.skipNonRFC5424Records = properties.getProperty("skipNonRFC5424Records", "false").equalsIgnoreCase("true"); @@ -307,4 +309,8 @@ public String getDfsDataTransferProtection() { public String getDfsEncryptDataTransferCipherSuites() { return dfsEncryptDataTransferCipherSuites; } + + public long consumerTimeout() { + return consumerTimeout; + } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index aecf1475..f95b0a50 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -134,7 +134,6 @@ public void accept(List batch) { } catch (IOException e) { LOGGER.error("Failed to write the SyslogRecords to PartitionFileImpl <{}> in topic <{}>", key, topic); - // FIXME: Fail fast and restart the whole cfe_39 so the kafka consumer group offsets can be fetched again from the files stored in HDFS. throw new RuntimeException(e); } }); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index c3077378..dbfeefdc 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -45,13 +45,19 @@ */ package com.teragrep.cfe_39.consumers.kafka; +import com.teragrep.cfe_39.configuration.Config; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; -import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.TopicPartition; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.net.URI; import java.util.Collection; import java.util.HashMap; import java.util.Map; @@ -59,44 +65,101 @@ public class ConsumerRebalanceListenerImpl implements ConsumerRebalanceListener { private final Logger LOGGER = LoggerFactory.getLogger(ConsumerRebalanceListenerImpl.class); + private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; - private final Map currentOffsets; + private final Config config; public ConsumerRebalanceListenerImpl( Consumer kafkaConsumer, - BatchDistributionImpl callbackFunction + BatchDistributionImpl callbackFunction, + Config config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; - this.currentOffsets = new HashMap<>(); - } - - public void addOffsetToTrack(String topic, int partition, long offset) { - currentOffsets.put(new TopicPartition(topic, partition), new OffsetAndMetadata(offset + 1, null)); - // 1. Pass listener to callbackFunction. - // 2. Call the addOffsetToTrack() every time a file is stored to HDFS. - // 3. Finally remove the try/catch from BatchDistributionImpl and instead let the KafkaReader to try/catch the exception. - // 4. In KafkaReader commit the offsets using the listener's getCurrentOffsets() method, and then re-throw the exception. - } - - // this is used when we shut down our consumer gracefully - public Map getCurrentOffsets() { - return currentOffsets; + this.config = config; } @Override - public void onPartitionsRevoked(Collection collection) { + public void onPartitionsRevoked(Collection partitions) { // Flush any records from the temporary files to HDFS to synchronize database with committed kafka offsets, and clean up PartitionFile list. LOGGER.info("onPartitionsRevoked triggered"); callbackFunction.rebalance(); - LOGGER.info("Committing offsets <{}>", currentOffsets); - kafkaConsumer.commitSync(currentOffsets); } @Override - public void onPartitionsAssigned(Collection collection) { + public void onPartitionsAssigned(Collection partitions) { LOGGER.info("onPartitionsAssigned triggered"); - // NoOp: records and offsets are already stored to HDFS by the callbackFunction.rebalance(), and kafka coordinator should handle committed offsets automatically. + // Generates offsets of the already committed records for Kafka and passes them to the kafka consumers. + FileSystem fs; + if (!"kerberos".equals(config.getHadoopAuthentication())) { + // Initializing the FileSystem with minicluster. + String hdfsuri = config.getHdfsuri(); + // ====== Init HDFS File System Object + HdfsConfiguration conf = new HdfsConfiguration(); + // Set FileSystem URI + conf.set("fs.defaultFS", hdfsuri); + // Because of Maven + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + // Set HADOOP user + System.setProperty("HADOOP_USER_NAME", "hdfs"); + System.setProperty("hadoop.home.dir", "/"); + //Get the filesystem - HDFS + try { + fs = FileSystem.get(URI.create(hdfsuri), conf); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + else { + // Initializing the FileSystem with kerberos. + String hdfsuri = config.getHdfsuri(); // Get from config. + // set kerberos host and realm + System.setProperty("java.security.krb5.realm", config.getKerberosRealm()); + System.setProperty("java.security.krb5.kdc", config.getKerberosHost()); + HdfsConfiguration conf = new HdfsConfiguration(); + // enable kerberus + conf.set("hadoop.security.authentication", config.getHadoopAuthentication()); + conf.set("hadoop.security.authorization", config.getHadoopAuthorization()); + conf.set("hadoop.kerberos.keytab.login.autorenewal.enabled", config.getKerberosLoginAutorenewal()); + conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI + conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? + conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? + /* hack for running locally with fake DNS records + set this to true if overriding the host name in /etc/hosts*/ + conf.set("dfs.client.use.datanode.hostname", config.getKerberosTestMode()); + /* server principal + the kerberos principle that the namenode is using*/ + conf.set("dfs.namenode.kerberos.principal.pattern", config.getKerberosPrincipal()); + // set sasl + conf.set("dfs.data.transfer.protection", config.getDfsDataTransferProtection()); + conf.set("dfs.encrypt.data.transfer.cipher.suites", config.getDfsEncryptDataTransferCipherSuites()); + // filesystem for HDFS access is set here + try { + fs = FileSystem.get(conf); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + Map hdfsStartOffsets = new HashMap<>(); + try (HDFSRead hr = new HDFSRead(config, fs)) { + hdfsStartOffsets = hr.hdfsStartOffsets(); + LOGGER.debug("topicPartitionStartMap generated succesfully: <{}>", hdfsStartOffsets); + } + catch (IOException e) { + throw new RuntimeException(e); + } + for (TopicPartition topicPartition : partitions) { + if (hdfsStartOffsets.containsKey(topicPartition)) { + long position = kafkaConsumer.position(topicPartition); + if (position < hdfsStartOffsets.get(topicPartition)) { + kafkaConsumer.seek(topicPartition, hdfsStartOffsets.get(topicPartition)); + } + } + } } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 22a21c5c..e6b6048c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -216,12 +216,7 @@ private void createReader( durationStatistics, // RuntimeStatistics object from metrics topicCounter // TopicCounter object from metrics ); - ReadCoordinator readCoordinator = new ReadCoordinator( - topic, - config.getKafkaConsumerProperties(), - output, - hdfsStartOffsets - ); + ReadCoordinator readCoordinator = new ReadCoordinator(topic, config, output, hdfsStartOffsets); Thread readThread = new Thread(null, readCoordinator, topic + threadId); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. threads.add(readThread); readThread.start(); // Starts the thread, in other words proceeds to call run() function of ReadCoordinator. diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index 2bd7e2cc..9f647cf9 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -45,29 +45,37 @@ */ package com.teragrep.cfe_39.consumers.kafka; +import com.teragrep.cfe_39.configuration.Config; import org.apache.kafka.clients.consumer.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.time.Duration; +import java.time.Instant; import java.util.*; public class KafkaReader implements AutoCloseable { private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); + + private final Config config; private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; private final ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl; + private long lastTimeCalled; public KafkaReader( Consumer kafkaConsumer, BatchDistributionImpl callbackFunction, - ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl + ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl, + Config config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; this.consumerRebalanceListenerImpl = consumerRebalanceListenerImpl; + this.config = config; + this.lastTimeCalled = Instant.now().toEpochMilli(); } public void read() { @@ -95,17 +103,17 @@ public void read() { /* This is the BatchDistributionImpl.accept() function. KafkaRecord and other required data for HDFS storage are added to the input parameters of the accept() function which processes the consumed record.*/ callbackFunction.accept(recordOffsetObjectList); - kafkaConsumer.commitSync(); - // lastTimeCalled = Instant.now().toEpochMilli(); + kafkaConsumer.commitAsync(); + lastTimeCalled = Instant.now().toEpochMilli(); } else { - // FIXME: If no new kafka record batches is received for a while, use callbackFunction.accept() with empty recordOffsetObjectList to flush records that have already been committed in kafka to HDFS. - /*long thisTime = Instant.now().toEpochMilli(); + // If no new kafka record batches is received for a while, use callbackFunction.accept() with empty recordOffsetObjectList to flush records that have already been committed in kafka to HDFS. + long thisTime = Instant.now().toEpochMilli(); long ftook = thisTime - lastTimeCalled; - if (ftook > config.consumerTimeout) { + if (ftook > config.consumerTimeout()) { callbackFunction.accept(recordOffsetObjectList); lastTimeCalled = Instant.now().toEpochMilli(); - }*/ + } } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index 71c5feca..bae7ce67 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; +import com.teragrep.cfe_39.configuration.Config; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.ByteArrayDeserializer; @@ -58,19 +59,19 @@ public class ReadCoordinator implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(ReadCoordinator.class); private final String queueTopic; - private final Properties readerKafkaProperties; + Config config; private final BatchDistributionImpl callbackFunction; private boolean run = true; private final Map hdfsStartOffsets; public ReadCoordinator( String queueTopic, - Properties readerKafkaProperties, + Config config, BatchDistributionImpl callbackFunction, Map hdfsStartOffsets ) { this.queueTopic = queueTopic; - this.readerKafkaProperties = readerKafkaProperties; + this.config = config; this.callbackFunction = callbackFunction; this.hdfsStartOffsets = hdfsStartOffsets; } @@ -88,17 +89,29 @@ private KafkaReader createKafkaReader( String name = Thread.currentThread().getName(); // Use thread name to identify which thread is running the code. if (Objects.equals(name, "testConsumerTopic1")) { kafkaConsumer = MockKafkaConsumerFactory.getConsumer(1); // creates a Kafka MockConsumer that has the odd numbered partitions assigned to it. - consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( + kafkaConsumer, + callbackFunction, + config + ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } else if (Objects.equals(name, "testConsumerTopic2")) { kafkaConsumer = MockKafkaConsumerFactory.getConsumer(2); // creates a Kafka MockConsumer that has the even numbered partitions assigned to it. - consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( + kafkaConsumer, + callbackFunction, + config + ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } else { kafkaConsumer = MockKafkaConsumerFactory.getConsumer(0); // Creates a single Kafka MockConsumer that has all the partitions assigned to it. - consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( + kafkaConsumer, + callbackFunction, + config + ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } } @@ -108,7 +121,7 @@ else if (Objects.equals(name, "testConsumerTopic2")) { new ByteArrayDeserializer(), new ByteArrayDeserializer() ); - consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction); + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction, config); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } @@ -123,17 +136,17 @@ else if (Objects.equals(name, "testConsumerTopic2")) { } } - return new KafkaReader(kafkaConsumer, callbackFunction, consumerRebalanceListenerImpl); + return new KafkaReader(kafkaConsumer, callbackFunction, consumerRebalanceListenerImpl, config); } // Part or Runnable implementation, called when the thread is started. @Override public void run() { boolean useMockKafkaConsumer = Boolean - .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); + .parseBoolean(config.getKafkaConsumerProperties().getProperty("useMockKafkaConsumer", "false")); try ( KafkaReader kafkaReader = createKafkaReader( - readerKafkaProperties, queueTopic, callbackFunction, useMockKafkaConsumer + config.getKafkaConsumerProperties(), queueTopic, callbackFunction, useMockKafkaConsumer ) ) { while (run) { diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 565ac878..d4faa5f5 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -125,7 +125,7 @@ public void readCoordinatorTest2Threads() { ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", - config.getKafkaConsumerProperties(), + config, output1, hdfsStartOffsets ); @@ -136,7 +136,7 @@ public void readCoordinatorTest2Threads() { ReadCoordinator readCoordinator2 = new ReadCoordinator( "testConsumerTopic", - config.getKafkaConsumerProperties(), + config, output2, hdfsStartOffsets ); @@ -188,7 +188,7 @@ public void readCoordinatorTest1Thread() { ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", - config.getKafkaConsumerProperties(), + config, output, hdfsStartOffsets ); From 9e4f7607bb1005e618dbc7ce7d6c9392748ebcae Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 2 Sep 2024 12:06:47 +0300 Subject: [PATCH 33/77] Renamed KafkaRecordConverter to KafkaAsSyslogRecord. --- .../{KafkaRecordConverter.java => KafkaAsSyslogRecord.java} | 4 ++-- .../com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) rename src/main/java/com/teragrep/cfe_39/consumers/kafka/{KafkaRecordConverter.java => KafkaAsSyslogRecord.java} (99%) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordConverter.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java similarity index 99% rename from src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordConverter.java rename to src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java index ce7366ac..eaac4924 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordConverter.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java @@ -56,7 +56,7 @@ import java.time.Instant; import java.time.ZonedDateTime; -public class KafkaRecordConverter { +public class KafkaAsSyslogRecord { private final SDVector eventNodeSourceSource; private final SDVector eventNodeRelaySource; @@ -75,7 +75,7 @@ public class KafkaRecordConverter { private final ByteBuffer sourceConcatenationBuffer; - public KafkaRecordConverter() { + public KafkaAsSyslogRecord() { this.eventNodeSourceSource = new SDVector("event_node_source@48577", "source"); this.eventNodeRelaySource = new SDVector("event_node_relay@48577", "source"); this.eventNodeSourceSourceModule = new SDVector("event_node_source@48577", "source_module"); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java index 6fb36229..43d2cec5 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java @@ -46,7 +46,6 @@ package com.teragrep.cfe_39.consumers.kafka; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.rlo_06.*; import java.io.ByteArrayInputStream; import java.io.InputStream; @@ -85,7 +84,7 @@ public String offsetToJSON() { @Override public SyslogRecord toSyslogRecord() { InputStream inputStream = new ByteArrayInputStream(record); - return new KafkaRecordConverter().convert(inputStream, String.valueOf(partition), offset); + return new KafkaAsSyslogRecord().convert(inputStream, String.valueOf(partition), offset); } } From fc1495e22903844cff274f6dfe0e0eb4e9540d59 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 2 Sep 2024 12:23:18 +0300 Subject: [PATCH 34/77] Renamed convert() method to more appropriate toSyslogRecord(). --- .../teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java | 2 +- .../com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java index eaac4924..bb45e7b1 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java @@ -104,7 +104,7 @@ private long rfc3339ToEpoch(ZonedDateTime zonedDateTime) { return Math.addExact(sec, instant.getNano() / NANOS_PER_MICROS); } - public SyslogRecord convert(InputStream inputStream, String partition, long offset) { + public SyslogRecord toSyslogRecord(InputStream inputStream, String partition, long offset) { rfc5424Frame.load(inputStream); try { rfc5424Frame.next(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java index 43d2cec5..e3870447 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java @@ -84,7 +84,7 @@ public String offsetToJSON() { @Override public SyslogRecord toSyslogRecord() { InputStream inputStream = new ByteArrayInputStream(record); - return new KafkaAsSyslogRecord().convert(inputStream, String.valueOf(partition), offset); + return new KafkaAsSyslogRecord().toSyslogRecord(inputStream, String.valueOf(partition), offset); } } From af1c02742558aba86bab41104463fc99c506dab0 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 2 Sep 2024 12:28:42 +0300 Subject: [PATCH 35/77] Renamed rebalance() method to more appropriate delete(). --- .../teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java | 2 +- .../java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java | 2 +- .../com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index f95b0a50..9202b099 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -169,7 +169,7 @@ public void rebalance() { accept(new ArrayList<>()); // Will write all files with records still in them to HDFS. // Delete all PartitionFile objects from the partitionFileMap. Must also delete the files linked to the objects. partitionFileMap.forEach((key, value) -> { - value.rebalance(); + value.delete(); }); partitionFileMap.clear(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java index 00396f9f..6c4d67f7 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -55,6 +55,6 @@ public interface PartitionFile { void writeToHdfsEarly() throws IOException; - void rebalance(); + void delete(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 85bbbd89..9bceef6b 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -142,7 +142,7 @@ public void writeToHdfsEarly() throws IOException { } @Override - public void rebalance() { + public void delete() { if (LOGGER.isDebugEnabled()) { LOGGER .debug( From 62513feb4a590277642c8265f04afc306208dbce Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 4 Sep 2024 11:10:31 +0300 Subject: [PATCH 36/77] Continuing refactoring Configuration classes. Combined kerberos/kafka/hdfs configurations back into a single class to simplify Configuration usage. WIP --- .../cfe_39/configuration/Configuration.java | 8 +- .../configuration/ConfigurationImpl.java | 44 ++++++---- .../configuration/HdfsConfiguration.java | 49 ----------- .../configuration/HdfsConfigurationImpl.java | 73 ---------------- .../configuration/KafkaConfigurationImpl.java | 83 ------------------- .../configuration/KerberosConfiguration.java | 49 ----------- .../KerberosConfigurationImpl.java | 73 ---------------- .../teragrep/cfe_39/ConfigurationTest.java} | 22 +++-- 8 files changed, 52 insertions(+), 349 deletions(-) delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java rename src/{main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java => test/java/com/teragrep/cfe_39/ConfigurationTest.java} (70%) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java index de836abf..add3961f 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java @@ -45,12 +45,16 @@ */ package com.teragrep.cfe_39.configuration; +import java.io.IOException; + public interface Configuration { + ConfigurationImpl loadPropertiesFile() throws IOException; + + Configuration with(String key, String value); + String valueOf(String key); boolean has(String key); - boolean equals(Object o); - } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java index ab5f8a6e..6f19a031 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java @@ -55,37 +55,51 @@ import java.nio.file.Paths; import java.util.Properties; -public class ConfigurationImpl implements Configuration { +public final class ConfigurationImpl implements Configuration { - private final Logger LOGGER = LoggerFactory.getLogger(Config.class); + private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationImpl.class); private final Properties properties; - private final KafkaConfigurationImpl kafkaConfigurationImpl; - private final HdfsConfigurationImpl hdfsConfigurationImpl; - private final KerberosConfigurationImpl kerberosConfigurationImpl; - public ConfigurationImpl() throws IOException { - properties = new Properties(); + public ConfigurationImpl() { + this(new Properties()); + } + + public ConfigurationImpl(Properties properties) { + this.properties = properties; + } + + @Override + public ConfigurationImpl loadPropertiesFile() throws IOException { + // Maybe implement the configuration validation here instead of a public method? + final Properties newProperties = new Properties(); Path configPath = Paths .get(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); LOGGER.info("Loading application config <[{}]>", configPath.toAbsolutePath()); - try (InputStream inputStream = Files.newInputStream(configPath)) { - properties.load(inputStream); - LOGGER.debug("Got configuration: <{}>", properties); + newProperties.load(inputStream); + LOGGER.debug("Got configuration: <{}>", newProperties); } + return new ConfigurationImpl(newProperties); + } - this.kafkaConfigurationImpl = new KafkaConfigurationImpl(properties); - this.hdfsConfigurationImpl = new HdfsConfigurationImpl(properties); - this.kerberosConfigurationImpl = new KerberosConfigurationImpl(properties); + @Override + public ConfigurationImpl with(String key, String value) { + // Maybe implement the configuration validation here instead of a public method? + final Properties newProperties = new Properties(properties); + newProperties.setProperty(key, value); + return new ConfigurationImpl(newProperties); } @Override public String valueOf(String key) { - return ""; + if (has(key)) { + return properties.getProperty(key); + } + throw new IllegalArgumentException("Key not found: " + key); } @Override public boolean has(String key) { - return false; + return properties.containsKey(key); } } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java deleted file mode 100644 index 26929885..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -public interface HdfsConfiguration { -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java deleted file mode 100644 index 6249e187..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationImpl.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import java.util.Enumeration; -import java.util.Properties; - -public class HdfsConfigurationImpl implements HdfsConfiguration { - - private final Properties hdfsProperties; - - public HdfsConfigurationImpl(Properties properties) { - this.hdfsProperties = loadSubProperties(properties, "hdfs."); - } - - private Properties loadSubProperties(Properties properties, String prefix) { - Properties subProperties = new Properties(); - - Enumeration keys = properties.keys(); - while (keys.hasMoreElements()) { - String key = String.valueOf(keys.nextElement()); - if (key.startsWith(prefix)) { - String value = properties.getProperty(key); - String subKey = key.replaceFirst(prefix, ""); - subProperties.put(subKey, value); - } - } - return subProperties; - } -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java deleted file mode 100644 index 92af7270..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationImpl.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import java.util.Enumeration; -import java.util.Properties; - -public class KafkaConfigurationImpl implements KafkaConfiguration { - - Properties kafkaConsumerProperties; - - public KafkaConfigurationImpl(Properties properties) { - this.kafkaConsumerProperties = loadSubProperties(properties, "consumer."); - } - - private Properties loadSubProperties(Properties properties, String prefix) { - Properties subProperties = new Properties(); - - Enumeration keys = properties.keys(); - while (keys.hasMoreElements()) { - String key = String.valueOf(keys.nextElement()); - if (key.startsWith(prefix)) { - String value = properties.getProperty(key); - String subKey = key.replaceFirst(prefix, ""); - subProperties.put(subKey, value); - } - } - return subProperties; - } - - @Override - public String valueOf(String key) { - return ""; - } - - @Override - public boolean has(String key) { - return false; - } -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java deleted file mode 100644 index b0bda537..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfiguration.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -public interface KerberosConfiguration { -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java deleted file mode 100644 index 34d2109f..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/KerberosConfigurationImpl.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import java.util.Enumeration; -import java.util.Properties; - -public class KerberosConfigurationImpl implements KerberosConfiguration { - - private final Properties kerberosProperties; - - public KerberosConfigurationImpl(Properties properties) { - this.kerberosProperties = loadSubProperties(properties, "kerberos."); - } - - private Properties loadSubProperties(Properties properties, String prefix) { - Properties subProperties = new Properties(); - - Enumeration keys = properties.keys(); - while (keys.hasMoreElements()) { - String key = String.valueOf(keys.nextElement()); - if (key.startsWith(prefix)) { - String value = properties.getProperty(key); - String subKey = key.replaceFirst(prefix, ""); - subProperties.put(subKey, value); - } - } - return subProperties; - } -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java similarity index 70% rename from src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java rename to src/test/java/com/teragrep/cfe_39/ConfigurationTest.java index 71d57546..f119f7ff 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java @@ -43,13 +43,25 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.cfe_39.configuration; +package com.teragrep.cfe_39; -public interface KafkaConfiguration { +import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; - String valueOf(String key); +import java.io.IOException; - boolean has(String key); +public class ConfigurationTest { - boolean equals(Object o); + @Test + public void configurationTest() throws IOException { + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); + String s = configuration.valueOf("hdfsuri"); + Assertions.assertEquals("hdfs://localhost:45937/", s); + configuration = configuration.with("hdfsuri", "123456"); + s = configuration.valueOf("hdfsuri"); + Assertions.assertEquals("123456", s); + } } From e790cdc6b84146ebc8d3d907bfad2ad776cf3213 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 5 Sep 2024 15:37:09 +0300 Subject: [PATCH 37/77] Continuing refactoring Configuration classes. Implemented Kafka properties printing. Implemented ConfigurationValidation interface and class. --- .../cfe_39/configuration/Configuration.java | 3 +- .../configuration/ConfigurationImpl.java | 28 +++- .../ConfigurationValidation.java | 54 ++++++ .../ConfigurationValidationImpl.java | 158 ++++++++++++++++++ 4 files changed, 236 insertions(+), 7 deletions(-) create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java index add3961f..c6b15737 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java @@ -46,6 +46,7 @@ package com.teragrep.cfe_39.configuration; import java.io.IOException; +import java.util.Properties; public interface Configuration { @@ -55,6 +56,6 @@ public interface Configuration { String valueOf(String key); - boolean has(String key); + Properties toKafkaConsumerProperties(); } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java index 6f19a031..cd0613a5 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java @@ -59,6 +59,7 @@ public final class ConfigurationImpl implements Configuration { private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationImpl.class); private final Properties properties; + private final ConfigurationValidationImpl configurationValidationImpl; public ConfigurationImpl() { this(new Properties()); @@ -66,11 +67,11 @@ public ConfigurationImpl() { public ConfigurationImpl(Properties properties) { this.properties = properties; + configurationValidationImpl = new ConfigurationValidationImpl(); } @Override public ConfigurationImpl loadPropertiesFile() throws IOException { - // Maybe implement the configuration validation here instead of a public method? final Properties newProperties = new Properties(); Path configPath = Paths .get(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); @@ -79,27 +80,42 @@ public ConfigurationImpl loadPropertiesFile() throws IOException { newProperties.load(inputStream); LOGGER.debug("Got configuration: <{}>", newProperties); } + configurationValidationImpl.validate(newProperties); return new ConfigurationImpl(newProperties); } @Override public ConfigurationImpl with(String key, String value) { - // Maybe implement the configuration validation here instead of a public method? - final Properties newProperties = new Properties(properties); + final Properties newProperties = new Properties(); + newProperties.putAll(properties); newProperties.setProperty(key, value); + configurationValidationImpl.validate(newProperties); return new ConfigurationImpl(newProperties); } @Override public String valueOf(String key) { - if (has(key)) { + if (properties.containsKey(key)) { return properties.getProperty(key); } throw new IllegalArgumentException("Key not found: " + key); } @Override - public boolean has(String key) { - return properties.containsKey(key); + public Properties toKafkaConsumerProperties() { + Properties kafkaProperties = new Properties(); + kafkaProperties.put("bootstrap.servers", valueOf("bootstrap.servers")); + kafkaProperties.put("auto.offset.reset", valueOf("auto.offset.reset")); + kafkaProperties.put("enable.auto.commit", valueOf("enable.auto.commit")); + kafkaProperties.put("group.id", valueOf("group.id")); + kafkaProperties.put("security.protocol", valueOf("security.protocol")); + kafkaProperties.put("sasl.mechanism", valueOf("sasl.mechanism")); + kafkaProperties.put("max.poll.records", valueOf("max.poll.records")); + kafkaProperties.put("fetch.max.bytes", valueOf("fetch.max.bytes")); + kafkaProperties.put("request.timeout.ms", valueOf("request.timeout.ms")); + kafkaProperties.put("max.poll.interval.ms", valueOf("max.poll.interval.ms")); + kafkaProperties.put("useMockKafkaConsumer", valueOf("useMockKafkaConsumer")); + return kafkaProperties; } + } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java new file mode 100644 index 00000000..5463e15d --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java @@ -0,0 +1,54 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import java.util.Properties; + +public interface ConfigurationValidation { + + void validate(Properties properties); + +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java new file mode 100644 index 00000000..1b4dc1fb --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java @@ -0,0 +1,158 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +public final class ConfigurationValidationImpl implements ConfigurationValidation { + + private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationValidationImpl.class); + private final Set requiredKeys; + + public ConfigurationValidationImpl() { + this.requiredKeys = new HashSet<>(); + } + + public void validate(Properties properties) { + validateKeys(properties); + validateValues(properties); + } + + private void validateKeys(Properties properties) { + if (requiredKeys.isEmpty()) { + loadRequiredKeys(); + } + int requiredCount = 0; + for (Map.Entry keyValuePair : properties.entrySet()) { + if (requiredKeys.contains(keyValuePair.getKey().toString())) { + requiredCount++; + } + else { + throw new IllegalStateException("Unauthorized key " + keyValuePair.getKey().toString()); + } + } + if (requiredCount < requiredKeys.size()) { + for (String key : requiredKeys) { + if (!properties.containsKey(key)) { + throw new IllegalStateException("Missing required key " + key); + } + } + } + } + + private void validateValues(Properties properties) { + // Check the requirements for the specific key-value pairs. + if (Long.parseLong(properties.getProperty("pruneOffset")) <= 0) { + throw new IllegalArgumentException( + "pruneOffset must be set to >0, got " + properties.getProperty("pruneOffset") + ); + } + if (Long.parseLong(properties.getProperty("maximumFileSize")) <= 0) { + throw new IllegalArgumentException( + "maximumFileSize must be set to >0, got " + properties.getProperty("maximumFileSize") + ); + } + if (Long.parseLong(properties.getProperty("numOfConsumers")) <= 0) { + throw new IllegalArgumentException( + "numOfConsumers must be set to >0, got " + properties.getProperty("numOfConsumers") + ); + } + if (Long.parseLong(properties.getProperty("maximumFileSize")) <= 0) { + throw new IllegalArgumentException( + "maximumFileSize must be set to >0, got " + properties.getProperty("maximumFileSize") + ); + } + if (Long.parseLong(properties.getProperty("consumerTimeout")) <= 0) { + throw new IllegalArgumentException( + "consumerTimeout must be set to >0, got " + properties.getProperty("consumerTimeout") + ); + } + } + + private void loadRequiredKeys() { + // Common + requiredKeys.add("pruneOffset"); + requiredKeys.add("queueDirectory"); + requiredKeys.add("maximumFileSize"); + requiredKeys.add("queueTopicPattern"); + requiredKeys.add("numOfConsumers"); + requiredKeys.add("consumerTimeout"); + requiredKeys.add("skipNonRFC5424Records"); + requiredKeys.add("skipEmptyRFC5424Records"); + requiredKeys.add("log4j2.configurationFile"); + // kafka + requiredKeys.add("bootstrap.servers"); + requiredKeys.add("auto.offset.reset"); + requiredKeys.add("enable.auto.commit"); + requiredKeys.add("group.id"); + requiredKeys.add("security.protocol"); + requiredKeys.add("sasl.mechanism"); + requiredKeys.add("max.poll.records"); + requiredKeys.add("fetch.max.bytes"); + requiredKeys.add("request.timeout.ms"); + requiredKeys.add("max.poll.interval.ms"); + requiredKeys.add("useMockKafkaConsumer"); + // HDFS + requiredKeys.add("hdfsPath"); + requiredKeys.add("hdfsuri"); + requiredKeys.add("dfs.client.use.datanode.hostname"); + requiredKeys.add("dfs.data.transfer.protection"); + requiredKeys.add("dfs.encrypt.data.transfer.cipher.suites"); + // Kerberos + requiredKeys.add("hadoop.security.authentication"); + requiredKeys.add("hadoop.security.authorization"); + requiredKeys.add("dfs.namenode.kerberos.principal.pattern"); + requiredKeys.add("java.security.krb5.kdc"); + requiredKeys.add("java.security.krb5.realm"); + requiredKeys.add("KerberosKeytabUser"); + requiredKeys.add("KerberosKeytabPath"); + requiredKeys.add("kerberosLoginAutorenewal"); + } + +} From 9519482b48e696701c4c0cf87b2003a4dd07734e Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 6 Sep 2024 14:43:44 +0300 Subject: [PATCH 38/77] Implemented configureLogging() method for configuring logging. --- .../teragrep/cfe_39/configuration/Configuration.java | 2 ++ .../cfe_39/configuration/ConfigurationImpl.java | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java index c6b15737..9d6f7727 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java @@ -58,4 +58,6 @@ public interface Configuration { Properties toKafkaConsumerProperties(); + void configureLogging() throws IOException; + } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java index cd0613a5..e0768e92 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java @@ -45,6 +45,7 @@ */ package com.teragrep.cfe_39.configuration; +import org.apache.logging.log4j.core.config.Configurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -118,4 +119,13 @@ public Properties toKafkaConsumerProperties() { return kafkaProperties; } + @Override + public void configureLogging() throws IOException { + // Just for loggers to work + Path log4j2Config = Paths + .get(properties.getProperty("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties")); + LOGGER.info("Loading log4j2 config from <[{}]>", log4j2Config.toRealPath()); + Configurator.reconfigure(log4j2Config.toUri()); + } + } From 93fd7369c9ea762a540d6ff0647e9a49ba3da62d Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 6 Sep 2024 15:12:09 +0300 Subject: [PATCH 39/77] Added missing java.security.auth.login.config key to required configuration keys. --- .../cfe_39/configuration/ConfigurationValidationImpl.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java index 1b4dc1fb..f83f58c2 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java @@ -127,6 +127,7 @@ private void loadRequiredKeys() { requiredKeys.add("skipEmptyRFC5424Records"); requiredKeys.add("log4j2.configurationFile"); // kafka + requiredKeys.add("java.security.auth.login.config"); requiredKeys.add("bootstrap.servers"); requiredKeys.add("auto.offset.reset"); requiredKeys.add("enable.auto.commit"); From f43813b96a1bdcf3a62a0d0c76563dcadd2bcac2 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 6 Sep 2024 15:30:50 +0300 Subject: [PATCH 40/77] Changed all Config class usage in the code to use the new ConfigurationImpl class and its methods instead. Disabled Config tests. Updated example and test .properties files according to the refactoring changes. --- rpm/resources/application.properties | 32 +++--- src/main/java/com/teragrep/cfe_39/Main.java | 7 +- .../kafka/BatchDistributionImpl.java | 6 +- .../kafka/ConsumerRebalanceListenerImpl.java | 38 +++--- .../cfe_39/consumers/kafka/HDFSPrune.java | 8 +- .../cfe_39/consumers/kafka/HDFSRead.java | 6 +- .../cfe_39/consumers/kafka/HDFSWrite.java | 108 +++++++++--------- .../consumers/kafka/HdfsDataIngestion.java | 55 +++++---- .../cfe_39/consumers/kafka/KafkaReader.java | 8 +- .../consumers/kafka/PartitionFileImpl.java | 10 +- .../consumers/kafka/PartitionRecordsImpl.java | 10 +- .../consumers/kafka/ReadCoordinator.java | 10 +- .../cfe_39/BatchDistributionTest.java | 48 ++++---- .../java/com/teragrep/cfe_39/ConfigTest.java | 5 + .../teragrep/cfe_39/ConfigurationTest.java | 32 ++++-- .../java/com/teragrep/cfe_39/HdfsTest.java | 51 +++++---- .../cfe_39/Ingestion0FilesLowSizeTest.java | 59 ++++++---- .../teragrep/cfe_39/Ingestion0FilesTest.java | 25 ++-- .../cfe_39/Ingestion1Old1NewFileTest.java | 44 ++++--- .../cfe_39/Ingestion2NewFilesTest.java | 42 ++++--- .../cfe_39/Ingestion2OldFilesTest.java | 38 +++--- .../teragrep/cfe_39/KafkaConsumerTest.java | 19 +-- .../cfe_39/ProcessingFailureTest.java | 26 +++-- .../teragrep/cfe_39/PruningNoFilesTest.java | 22 ++-- .../cfe_39/PruningOneNewFileTest.java | 33 +++--- .../cfe_39/PruningOneOldFileTest.java | 33 +++--- .../cfe_39/PruningOneOldOneNewFileTest.java | 39 ++++--- .../cfe_39/PruningTwoNewFilesTest.java | 34 +++--- .../cfe_39/PruningTwoOldFilesTest.java | 38 +++--- .../teragrep/cfe_39/SyslogAvroWriterTest.java | 16 ++- .../cfe_39/TestMiniClusterFactory.java | 4 +- .../resources/broken.application.properties | 36 +++--- .../failProcessing.application.properties | 36 +++--- .../largeFile.application.properties | 36 +++--- .../resources/valid.application.properties | 36 +++--- 35 files changed, 628 insertions(+), 422 deletions(-) diff --git a/rpm/resources/application.properties b/rpm/resources/application.properties index fc5100cb..5e1ffd99 100644 --- a/rpm/resources/application.properties +++ b/rpm/resources/application.properties @@ -7,25 +7,25 @@ queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 # Kafka bootstrap servers -consumer.bootstrap.servers=test +bootstrap.servers=test # Offset, should not be touched -consumer.auto.offset.reset=earliest +auto.offset.reset=earliest # Autocommit, should not be touched -consumer.enable.auto.commit=false +enable.auto.commit=false # Consumer group id, this is to track the progress of reading hte topic -consumer.group.id=cfe_39 +group.id=cfe_39 # Used security protocol and mechanism -consumer.security.protocol=SASL_PLAINTEXT -consumer.sasl.mechanism=PLAIN +security.protocol=SASL_PLAINTEXT +sasl.mechanism=PLAIN # Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -consumer.max.poll.records=500 +max.poll.records=500 # How much data can be fetched in one go -consumer.fetch.max.bytes=1073741820 +fetch.max.bytes=1073741820 # How long for request before timing out. Note that too big max poll records size can cause this to trigger -consumer.request.timeout.ms=300000 -consumer.max.poll.interval.ms=300000 -# For testing only, remove for prod. -consumer.useMockKafkaConsumer=true +request.timeout.ms=300000 +max.poll.interval.ms=300000 +# For testing only +useMockKafkaConsumer=false # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. @@ -38,10 +38,12 @@ skipEmptyRFC5424Records=true pruneOffset=172800000 # HDFS uri hdfsuri=hdfs://localhost:45937/ +# HDFS path +hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ # Kerberos java.security.krb5.kdc=test java.security.krb5.realm=test -hadoop.security.authentication=test +hadoop.security.authentication=kerberos hadoop.security.authorization=test dfs.namenode.kerberos.principal.pattern=test KerberosKeytabUser=test @@ -49,4 +51,6 @@ KerberosKeytabPath=test dfs.client.use.datanode.hostname=false kerberosLoginAutorenewal=true dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file +dfs.encrypt.data.transfer.cipher.suites=test +# timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS +consumerTimeout=300000 \ No newline at end of file diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index fddd3aed..bb7dfa64 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,9 +57,10 @@ public class Main { private static final Logger LOGGER = LoggerFactory.getLogger(Main.class); public static void main(String[] args) throws Exception { - Config config = null; + ConfigurationImpl config = new ConfigurationImpl(); try { - config = new Config(); + config = config.loadPropertiesFile(); + config.configureLogging(); } catch (IOException e) { LOGGER.error("Can't load config: ", e); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index 9202b099..4f796fc5 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39.consumers.kafka; import com.google.gson.*; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import com.teragrep.cfe_39.metrics.DurationStatistics; import org.slf4j.Logger; @@ -68,11 +68,11 @@ public class BatchDistributionImpl implements BatchDistribution { private final DurationStatistics durationStatistics; private final TopicCounter topicCounter; private long lastTimeCalled; - private final Config config; + private final ConfigurationImpl config; private final Map partitionFileMap; public BatchDistributionImpl( - Config config, + ConfigurationImpl config, String topic, DurationStatistics durationStatistics, TopicCounter topicCounter diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index dbfeefdc..286ba329 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -68,12 +68,12 @@ public class ConsumerRebalanceListenerImpl implements ConsumerRebalanceListener private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; - private final Config config; + private final ConfigurationImpl config; public ConsumerRebalanceListenerImpl( Consumer kafkaConsumer, BatchDistributionImpl callbackFunction, - Config config + ConfigurationImpl config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; @@ -92,9 +92,9 @@ public void onPartitionsAssigned(Collection partitions) { LOGGER.info("onPartitionsAssigned triggered"); // Generates offsets of the already committed records for Kafka and passes them to the kafka consumers. FileSystem fs; - if (!"kerberos".equals(config.getHadoopAuthentication())) { + if (!"kerberos".equals(config.valueOf("hadoop.security.authentication"))) { // Initializing the FileSystem with minicluster. - String hdfsuri = config.getHdfsuri(); + String hdfsuri = config.valueOf("hdfsuri"); // ====== Init HDFS File System Object HdfsConfiguration conf = new HdfsConfiguration(); // Set FileSystem URI @@ -115,27 +115,35 @@ public void onPartitionsAssigned(Collection partitions) { } else { // Initializing the FileSystem with kerberos. - String hdfsuri = config.getHdfsuri(); // Get from config. + String hdfsuri = config.valueOf("hdfsuri"); // Get from config. // set kerberos host and realm - System.setProperty("java.security.krb5.realm", config.getKerberosRealm()); - System.setProperty("java.security.krb5.kdc", config.getKerberosHost()); + System.setProperty("java.security.krb5.realm", config.valueOf("java.security.krb5.realm")); + System.setProperty("java.security.krb5.kdc", config.valueOf("java.security.krb5.kdc")); HdfsConfiguration conf = new HdfsConfiguration(); // enable kerberus - conf.set("hadoop.security.authentication", config.getHadoopAuthentication()); - conf.set("hadoop.security.authorization", config.getHadoopAuthorization()); - conf.set("hadoop.kerberos.keytab.login.autorenewal.enabled", config.getKerberosLoginAutorenewal()); + conf.set("hadoop.security.authentication", config.valueOf("hadoop.security.authentication")); + conf.set("hadoop.security.authorization", config.valueOf("hadoop.security.authorization")); + conf.set("hadoop.kerberos.keytab.login.autorenewal.enabled", config.valueOf("kerberosLoginAutorenewal")); conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? /* hack for running locally with fake DNS records set this to true if overriding the host name in /etc/hosts*/ - conf.set("dfs.client.use.datanode.hostname", config.getKerberosTestMode()); + conf.set("dfs.client.use.datanode.hostname", config.valueOf("dfs.client.use.datanode.hostname")); /* server principal the kerberos principle that the namenode is using*/ - conf.set("dfs.namenode.kerberos.principal.pattern", config.getKerberosPrincipal()); + conf + .set( + "dfs.namenode.kerberos.principal.pattern", + config.valueOf("dfs.namenode.kerberos.principal.pattern") + ); // set sasl - conf.set("dfs.data.transfer.protection", config.getDfsDataTransferProtection()); - conf.set("dfs.encrypt.data.transfer.cipher.suites", config.getDfsEncryptDataTransferCipherSuites()); + conf.set("dfs.data.transfer.protection", config.valueOf("dfs.data.transfer.protection")); + conf + .set( + "dfs.encrypt.data.transfer.cipher.suites", + config.valueOf("dfs.encrypt.data.transfer.cipher.suites") + ); // filesystem for HDFS access is set here try { fs = FileSystem.get(conf); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java index 59702a10..7f5a8913 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -61,9 +61,9 @@ public class HDFSPrune { private final Path newDirectoryPath; private final long cutOffEpoch; - public HDFSPrune(Config config, String topicName, FileSystem fs) throws IOException { + public HDFSPrune(ConfigurationImpl config, String topicName, FileSystem fs) throws IOException { this.fs = fs; - String path = config.getHdfsPath().concat("/").concat(topicName); + String path = config.valueOf("hdfsPath").concat("/").concat(topicName); //==== Create directory if not exists Path workingDir = fs.getWorkingDirectory(); newDirectoryPath = new Path(path); @@ -72,7 +72,7 @@ public HDFSPrune(Config config, String topicName, FileSystem fs) throws IOExcept fs.mkdirs(newDirectoryPath); LOGGER.info("Path <{}> created.", path); } - long pruneOffset = config.getPruneOffset(); + long pruneOffset = Long.parseLong(config.valueOf("pruneOffset")); cutOffEpoch = System.currentTimeMillis() - pruneOffset; // pruneOffset is parametrized in Config.java. Default value is 2 days in milliseconds. } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java index bb6d4758..43696b8b 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.fs.*; import org.apache.kafka.common.TopicPartition; import org.slf4j.Logger; @@ -64,9 +64,9 @@ The offset map can then be used for kafka consumer seek() method, which will add private final FileSystem fs; private final String path; - public HDFSRead(Config config, FileSystem fs) throws IOException { + public HDFSRead(ConfigurationImpl config, FileSystem fs) throws IOException { this.fs = fs; - path = config.getHdfsPath(); + path = config.valueOf("hdfsPath"); } public Map hdfsStartOffsets() throws IOException { diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index 584daf7d..7dbd1d7e 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.fs.*; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -67,20 +67,20 @@ public class HDFSWrite implements AutoCloseable { private final HdfsConfiguration conf; private final String hdfsuri; - public HDFSWrite(Config config, String topic, String partition, long offset) throws IOException { + public HDFSWrite(ConfigurationImpl config, String topic, String partition, long offset) throws IOException { - Properties readerKafkaProperties = config.getKafkaConsumerProperties(); + Properties readerKafkaProperties = config.toKafkaConsumerProperties(); this.useMockKafkaConsumer = Boolean .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); if (useMockKafkaConsumer) { // Code for initializing the class for mock hdfs database usage without kerberos. - hdfsuri = config.getHdfsuri(); + hdfsuri = config.valueOf("hdfsuri"); /* The filepath should be something like hdfs:///opt/teragrep/cfe_39/srv/topic_name/0.12345 where 12345 is offset and 0 the partition. In other words the directory named topic_name holds files that are named and arranged based on partition and the partition's offset. Every partition has its own set of unique offset values. These values should be fetched from config and other input parameters (topic+partition+offset).*/ - path = config.getHdfsPath() + "/" + topic; + path = config.valueOf("hdfsPath") + "/" + topic; fileName = partition + "." + offset; // filename should be constructed from partition and offset. // ====== Init HDFS File System Object @@ -104,35 +104,47 @@ These values should be fetched from config and other input parameters (topic+par } else { // Code for initializing the class for kerberized HDFS database usage. - hdfsuri = config.getHdfsuri(); + hdfsuri = config.valueOf("hdfsuri"); - path = config.getHdfsPath() + "/" + topic; + path = config.valueOf("hdfsPath") + "/" + topic; fileName = partition + "." + offset; // set kerberos host and realm - System.setProperty("java.security.krb5.realm", config.getKerberosRealm()); - System.setProperty("java.security.krb5.kdc", config.getKerberosHost()); + System.setProperty("java.security.krb5.realm", config.valueOf("java.security.krb5.realm")); + System.setProperty("java.security.krb5.kdc", config.valueOf("java.security.krb5.kdc")); conf = new HdfsConfiguration(); // enable kerberus - conf.set("hadoop.security.authentication", config.getHadoopAuthentication()); - conf.set("hadoop.security.authorization", config.getHadoopAuthorization()); - conf.set("hadoop.kerberos.keytab.login.autorenewal.enabled", config.getKerberosLoginAutorenewal()); + conf.set("hadoop.security.authentication", config.valueOf("hadoop.security.authentication")); + conf.set("hadoop.security.authorization", config.valueOf("hadoop.security.authorization")); + conf + .set( + "hadoop.kerberos.keytab.login.autorenewal.enabled", + config.valueOf("hadoop.kerberos.keytab.login.autorenewal.enabled") + ); conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? // hack for running locally with fake DNS records, set this to true if overriding the host name in /etc/hosts - conf.set("dfs.client.use.datanode.hostname", config.getKerberosTestMode()); + conf.set("dfs.client.use.datanode.hostname", config.valueOf("dfs.client.use.datanode.hostname")); // server principal, the kerberos principle that the namenode is using - conf.set("dfs.namenode.kerberos.principal.pattern", config.getKerberosPrincipal()); + conf + .set( + "dfs.namenode.kerberos.principal.pattern", + config.valueOf("dfs.namenode.kerberos.principal.pattern") + ); // set sasl - conf.set("dfs.data.transfer.protection", config.getDfsDataTransferProtection()); - conf.set("dfs.encrypt.data.transfer.cipher.suites", config.getDfsEncryptDataTransferCipherSuites()); + conf.set("dfs.data.transfer.protection", config.valueOf("dfs.data.transfer.protection")); + conf + .set( + "dfs.encrypt.data.transfer.cipher.suites", + config.valueOf("dfs.encrypt.data.transfer.cipher.suites") + ); // filesystem for HDFS access is set here fs = FileSystem.get(conf); @@ -140,45 +152,39 @@ These values should be fetched from config and other input parameters (topic+par } // Method for committing the AVRO-file to HDFS - public void commit(File syslogFile) { + public void commit(File syslogFile) throws IOException { // The code for writing the file to HDFS should be same for both test (non-kerberized access) and prod (kerberized access). - try { - //==== Create directory if not exists - Path workingDir = fs.getWorkingDirectory(); - // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. - Path newDirectoryPath = new Path(path); - if (!fs.exists(newDirectoryPath)) { - // Create new Directory - fs.mkdirs(newDirectoryPath); - LOGGER.info("Path <{}> created.", path); - } - - //==== Write file - LOGGER.debug("Begin Write file into hdfs"); - //Create a path - Path hdfswritepath = new Path(newDirectoryPath.toString() + "/" + fileName); // filename should be set according to the requirements: 0.12345 where 0 is Kafka partition and 12345 is Kafka offset. - if (fs.exists(hdfswritepath)) { - LOGGER - .debug( - "Deleting the seemingly duplicate source file {} because target file {} already exists in HDFS", - syslogFile.getPath(), hdfswritepath - ); - syslogFile.delete(); - throw new RuntimeException("File " + fileName + " already exists"); - } - else { - LOGGER.debug("Target file <{}> doesn't exist, proceeding normally.", hdfswritepath); - } - - Path path = new Path(syslogFile.getPath()); - fs.copyFromLocalFile(path, hdfswritepath); - LOGGER.debug("End Write file into hdfs"); - LOGGER.info("\nFile committed to HDFS, file writepath should be: <{}>\n", hdfswritepath); + //==== Create directory if not exists + Path workingDir = fs.getWorkingDirectory(); + // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. + Path newDirectoryPath = new Path(path); + if (!fs.exists(newDirectoryPath)) { + // Create new Directory + fs.mkdirs(newDirectoryPath); + LOGGER.info("Path <{}> created.", path); + } + //==== Write file + LOGGER.debug("Begin Write file into hdfs"); + //Create a path + Path hdfswritepath = new Path(newDirectoryPath.toString() + "/" + fileName); // filename should be set according to the requirements: 0.12345 where 0 is Kafka partition and 12345 is Kafka offset. + if (fs.exists(hdfswritepath)) { + LOGGER + .debug( + "Deleting the seemingly duplicate source file {} because target file {} already exists in HDFS", + syslogFile.getPath(), hdfswritepath + ); + syslogFile.delete(); + throw new RuntimeException("File " + fileName + " already exists"); } - catch (IOException e) { - throw new RuntimeException(e); + else { + LOGGER.debug("Target file <{}> doesn't exist, proceeding normally.", hdfswritepath); } + + Path path = new Path(syslogFile.getPath()); + fs.copyFromLocalFile(path, hdfswritepath); + LOGGER.debug("End Write file into hdfs"); + LOGGER.info("\nFile committed to HDFS, file writepath should be: <{}>\n", hdfswritepath); } // try-with-resources handles closing the filesystem automatically. diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index e6b6048c..855c5975 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.metrics.*; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import org.apache.hadoop.fs.FileSystem; @@ -73,7 +73,7 @@ public class HdfsDataIngestion { private static final Logger LOGGER = LoggerFactory.getLogger(HdfsDataIngestion.class); - private final Config config; + private final ConfigurationImpl config; private final org.apache.kafka.clients.consumer.Consumer kafkaConsumer; private final List threads = new ArrayList<>(); private final Set activeTopics = new HashSet<>(); @@ -83,17 +83,17 @@ public class HdfsDataIngestion { private Map hdfsStartOffsets; private final FileSystem fs; - public HdfsDataIngestion(Config config) throws IOException { + public HdfsDataIngestion(ConfigurationImpl config) throws IOException { keepRunning = true; this.config = config; - Properties readerKafkaProperties = config.getKafkaConsumerProperties(); - this.numOfConsumers = config.getNumOfConsumers(); + Properties readerKafkaProperties = config.toKafkaConsumerProperties(); + this.numOfConsumers = Integer.parseInt(config.valueOf("numOfConsumers")); this.useMockKafkaConsumer = Boolean .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); if (useMockKafkaConsumer) { this.kafkaConsumer = MockKafkaConsumerFactory.getConsumer(0); // A consumer used only for scanning the available topics to be allocated to consumers running in different threads (thus 0 as input parameter). // Initializing the FileSystem with minicluster. - String hdfsuri = config.getHdfsuri(); + String hdfsuri = config.valueOf("hdfsuri"); // ====== Init HDFS File System Object HdfsConfiguration conf = new HdfsConfiguration(); // Set FileSystem URI @@ -109,35 +109,48 @@ public HdfsDataIngestion(Config config) throws IOException { } else { this.kafkaConsumer = new KafkaConsumer<>( - config.getKafkaConsumerProperties(), + config.toKafkaConsumerProperties(), new ByteArrayDeserializer(), new ByteArrayDeserializer() ); // Initializing the FileSystem with kerberos. - String hdfsuri = config.getHdfsuri(); // Get from config. + String hdfsuri = config.valueOf("hdfsuri"); // Get from config. // set kerberos host and realm - System.setProperty("java.security.krb5.realm", config.getKerberosRealm()); - System.setProperty("java.security.krb5.kdc", config.getKerberosHost()); + System.setProperty("java.security.krb5.realm", config.valueOf("java.security.krb5.realm")); + System.setProperty("java.security.krb5.kdc", config.valueOf("java.security.krb5.kdc")); HdfsConfiguration conf = new HdfsConfiguration(); // enable kerberus - conf.set("hadoop.security.authentication", config.getHadoopAuthentication()); - conf.set("hadoop.security.authorization", config.getHadoopAuthorization()); - conf.set("hadoop.kerberos.keytab.login.autorenewal.enabled", config.getKerberosLoginAutorenewal()); + conf.set("hadoop.security.authentication", config.valueOf("hadoop.security.authentication")); + conf.set("hadoop.security.authorization", config.valueOf("hadoop.security.authorization")); + conf + .set( + "hadoop.kerberos.keytab.login.autorenewal.enabled", + config.valueOf("hadoop.kerberos.keytab.login.autorenewal.enabled") + ); conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? /* hack for running locally with fake DNS records set this to true if overriding the host name in /etc/hosts*/ - conf.set("dfs.client.use.datanode.hostname", config.getKerberosTestMode()); + conf.set("dfs.client.use.datanode.hostname", config.valueOf("dfs.client.use.datanode.hostname")); /* server principal the kerberos principle that the namenode is using*/ - conf.set("dfs.namenode.kerberos.principal.pattern", config.getKerberosPrincipal()); + conf + .set( + "dfs.namenode.kerberos.principal.pattern", + config.valueOf("dfs.namenode.kerberos.principal.pattern") + ); // set sasl - conf.set("dfs.data.transfer.protection", config.getDfsDataTransferProtection()); - conf.set("dfs.encrypt.data.transfer.cipher.suites", config.getDfsEncryptDataTransferCipherSuites()); + conf.set("dfs.data.transfer.protection", config.valueOf("dfs.data.transfer.protection")); + conf + .set( + "dfs.encrypt.data.transfer.cipher.suites", + config.valueOf("dfs.encrypt.data.transfer.cipher.suites") + ); // set usergroup stuff UserGroupInformation.setConfiguration(conf); - UserGroupInformation.loginUserFromKeytab(config.getKerberosKeytabUser(), config.getKerberosKeytabPath()); + UserGroupInformation + .loginUserFromKeytab(config.valueOf("KerberosKeytabUser"), config.valueOf("KerberosKeytabPath")); // filesystem for HDFS access is set here fs = FileSystem.get(conf); } @@ -163,7 +176,7 @@ public void run() throws InterruptedException, IOException { } while (keepRunning) { - if ("kerberos".equals(config.getHadoopAuthentication())) { + if ("kerberos".equals(config.valueOf("hadoop.security.authentication"))) { UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); } LOGGER.debug("Scanning for threads"); @@ -226,7 +239,7 @@ private void createReader( private void topicScan(DurationStatistics durationStatistics, List topicCounters) { Map> listTopics = kafkaConsumer.listTopics(Duration.ofSeconds(60)); - Pattern topicsRegex = Pattern.compile(config.getQueueTopicPattern()); + Pattern topicsRegex = Pattern.compile(config.valueOf("queueTopicPattern")); // Find the topics available in Kafka based on given QueueTopicPattern, both active and in-active. Set foundTopics = new HashSet<>(); Map> foundPartitions = new HashMap<>(); @@ -238,7 +251,7 @@ private void topicScan(DurationStatistics durationStatistics, List } } if (foundTopics.isEmpty()) { - throw new IllegalStateException("Pattern <[" + config.getQueueTopicPattern() + "]> found no topics."); + throw new IllegalStateException("Pattern <[" + config.valueOf("queueTopicPattern") + "]> found no topics."); } // subtract currently active topics from found topics foundTopics.removeAll(activeTopics); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index 9f647cf9..f5ca21b8 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.kafka.clients.consumer.*; import org.slf4j.Logger; @@ -59,7 +59,7 @@ public class KafkaReader implements AutoCloseable { private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); - private final Config config; + private final ConfigurationImpl config; private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; private final ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl; @@ -69,7 +69,7 @@ public KafkaReader( Consumer kafkaConsumer, BatchDistributionImpl callbackFunction, ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl, - Config config + ConfigurationImpl config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; @@ -110,7 +110,7 @@ KafkaRecord and other required data for HDFS storage are added to the input para // If no new kafka record batches is received for a while, use callbackFunction.accept() with empty recordOffsetObjectList to flush records that have already been committed in kafka to HDFS. long thisTime = Instant.now().toEpochMilli(); long ftook = thisTime - lastTimeCalled; - if (ftook > config.consumerTimeout()) { + if (ftook > Long.parseLong(config.valueOf("consumerTimeout"))) { callbackFunction.accept(recordOffsetObjectList); lastTimeCalled = Instant.now().toEpochMilli(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 9bceef6b..49b18dcf 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -46,8 +46,8 @@ package com.teragrep.cfe_39.consumers.kafka; import com.google.gson.JsonObject; -import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,14 +62,14 @@ public class PartitionFileImpl implements PartitionFile { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); private final JsonObject topicPartition; - private final Config config; + private final ConfigurationImpl config; private final File syslogFile; private final List batchOffsets; private final PartitionRecordsImpl partitionRecords; - PartitionFileImpl(Config config, JsonObject topicPartition) throws IOException { + PartitionFileImpl(ConfigurationImpl config, JsonObject topicPartition) throws IOException { WritableQueue writableQueue = new WritableQueue( - config.getQueueDirectory(), + config.valueOf("queueDirectory"), topicPartition.get("topic").getAsString() + topicPartition.get("partition").getAsString() ); this.syslogFile = writableQueue.getNextWritableFile(); @@ -106,7 +106,7 @@ public void commitRecords() throws IOException { storedOffset = next.getOffset(); } // When the file size has gone above the maximum, commit the file into HDFS using the latest topic/partition/offset values as the filename and then delete the local avro-file. - if (config.getMaximumFileSize() < syslogFile.length()) { + if (Long.parseLong(config.valueOf("maximumFileSize")) < syslogFile.length()) { writeToHdfs(storedOffset); } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java index e877bf2c..e0b2b2c0 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java @@ -45,8 +45,8 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.rlo_06.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,9 +59,9 @@ public class PartitionRecordsImpl implements PartitionRecords { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionRecordsImpl.class); private final List kafkaRecordList; - private final Config config; + private final ConfigurationImpl config; - public PartitionRecordsImpl(Config config) { + public PartitionRecordsImpl(ConfigurationImpl config) { this.kafkaRecordList = new ArrayList<>(); this.config = config; } @@ -79,7 +79,7 @@ public List toSyslogRecordList() { syslogRecordList.add(next.toSyslogRecord()); } catch (ParseException e) { - if (config.getSkipNonRFC5424Records()) { + if (config.valueOf("skipNonRFC5424Records").equalsIgnoreCase("true")) { LOGGER .warn( "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", @@ -92,7 +92,7 @@ public List toSyslogRecordList() { } } catch (NullPointerException e) { - if (config.getSkipEmptyRFC5424Records()) { + if (config.valueOf("skipEmptyRFC5424Records").equalsIgnoreCase("true")) { LOGGER .warn( "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index bae7ce67..b143ba67 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.ByteArrayDeserializer; @@ -59,14 +59,14 @@ public class ReadCoordinator implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(ReadCoordinator.class); private final String queueTopic; - Config config; + ConfigurationImpl config; private final BatchDistributionImpl callbackFunction; private boolean run = true; private final Map hdfsStartOffsets; public ReadCoordinator( String queueTopic, - Config config, + ConfigurationImpl config, BatchDistributionImpl callbackFunction, Map hdfsStartOffsets ) { @@ -143,10 +143,10 @@ else if (Objects.equals(name, "testConsumerTopic2")) { @Override public void run() { boolean useMockKafkaConsumer = Boolean - .parseBoolean(config.getKafkaConsumerProperties().getProperty("useMockKafkaConsumer", "false")); + .parseBoolean(config.toKafkaConsumerProperties().getProperty("useMockKafkaConsumer", "false")); try ( KafkaReader kafkaReader = createKafkaReader( - config.getKafkaConsumerProperties(), queueTopic, callbackFunction, useMockKafkaConsumer + config.toKafkaConsumerProperties(), queueTopic, callbackFunction, useMockKafkaConsumer ) ) { while (run) { diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 4146f6a0..52b1fed6 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -82,7 +82,7 @@ public class BatchDistributionTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -92,12 +92,16 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } @@ -109,7 +113,7 @@ public void teardownMiniCluster() { }); hdfsCluster.shutdown(); FileUtil.fullyDelete(baseDir); - File queueDirectory = new File(config.getQueueDirectory()); + File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); if (files[0].getName().equals("topicName0.1")) { files[0].delete(); @@ -320,7 +324,7 @@ record = new ConsumerRecord<>( // Assert that records 11-13 are present in local avro-file. - File queueDirectory = new File(config.getQueueDirectory()); + File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(1, files.length); @@ -338,9 +342,9 @@ record = new ConsumerRecord<>( // Assert that records 0-10 are present in HDFS - Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.10"))); - Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.10"); + Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.10"))); + Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.10"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -390,9 +394,9 @@ record = new ConsumerRecord<>( List kafkaRecordListEmpty = new ArrayList<>(); output.accept(kafkaRecordListEmpty); - Assertions.assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.13"))); - hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.13"); + Assertions.assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.13"))); + hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.13"); //Init input stream FSDataInputStream inputStream2 = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -485,13 +489,13 @@ public void skipNonRFC5424DatabaseOutputTest() { kafkaRecordList.add(kafkaRecord3); output.accept(kafkaRecordList); output.accept(new ArrayList<>()); - Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"))); + Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"))); // File in hdfs does not contain any empty records. // Assert that the file in hdfs contains the expected one record. - Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"); + Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -567,13 +571,13 @@ public void skipNullRFC5424DatabaseOutputTest() { kafkaRecordList.add(kafkaRecord3); output.accept(kafkaRecordList); output.accept(new ArrayList<>()); - Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.2"))); + Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.2"))); // File in hdfs does not contain any records, but acts as a marker for kafka consumer offsets. // Assert that the file in hdfs contains the expected zero record. - Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.2"); + Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.2"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -654,12 +658,12 @@ record = new ConsumerRecord<>( kafkaRecordList.add(kafkaRecord); output.accept(kafkaRecordList); output.accept(new ArrayList<>()); - Assertions.assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"))); + Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"))); // Assert that the file in hdfs contains the expected single record. - Path hdfsreadpath = new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.3"); + Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. diff --git a/src/test/java/com/teragrep/cfe_39/ConfigTest.java b/src/test/java/com/teragrep/cfe_39/ConfigTest.java index 7ebde247..89068f8a 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigTest.java @@ -47,6 +47,7 @@ import com.teragrep.cfe_39.configuration.Config; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,6 +60,7 @@ public class ConfigTest { private static final Logger LOGGER = LoggerFactory.getLogger(ConfigTest.class); + @Disabled @Test public void validConfigTest() { assertDoesNotThrow(() -> { @@ -75,6 +77,7 @@ public void validConfigTest() { }); } + @Disabled @Test public void brokenConfigTest() { // Set system properties to use the broken configuration. @@ -87,6 +90,7 @@ public void brokenConfigTest() { Assertions.assertEquals("hdfsuri not set", e.getMessage()); } + @Disabled @Test public void configEqualityTest() { assertDoesNotThrow(() -> { @@ -103,6 +107,7 @@ public void configEqualityTest() { }); } + @Disabled @Test public void configConstructorTest() { assertDoesNotThrow(() -> { diff --git a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java index f119f7ff..2d5d8121 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java @@ -49,19 +49,31 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.io.IOException; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; public class ConfigurationTest { @Test - public void configurationTest() throws IOException { - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); - String s = configuration.valueOf("hdfsuri"); - Assertions.assertEquals("hdfs://localhost:45937/", s); - configuration = configuration.with("hdfsuri", "123456"); - s = configuration.valueOf("hdfsuri"); - Assertions.assertEquals("123456", s); + public void configurationTest() { + assertDoesNotThrow(() -> { + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); + String s = configuration.valueOf("hdfsuri"); + Assertions.assertEquals("hdfs://localhost:45937/", s); + configuration = configuration.with("hdfsuri", "123456"); + s = configuration.valueOf("hdfsuri"); + Assertions.assertEquals("123456", s); + }); + } + + @Test + public void configurationTest2() { + assertDoesNotThrow(() -> { + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); + configuration.toKafkaConsumerProperties(); + }); } } diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index ffc9227f..04623fed 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -47,7 +47,7 @@ import com.google.gson.JsonObject; import com.google.gson.JsonParser; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HDFSWrite; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -70,7 +70,7 @@ public class HdfsTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Start minicluster and initialize config. @@ -80,12 +80,16 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } @@ -103,11 +107,11 @@ public void teardownMiniCluster() { public void hdfsWriteTest() { // This test case is for testing the functionality of the HDFSWrite.java by writing pre-generated AVRO-files to the HDFS database and asserting the results are correct. assertDoesNotThrow(() -> { - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); String pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.9"; java.nio.file.Path sourceFile = Paths.get(pathname); - java.nio.file.Path targetDir = Paths.get(config.getQueueDirectory()); + java.nio.file.Path targetDir = Paths.get(config.valueOf("queueDirectory")); java.nio.file.Path targetFile = targetDir.resolve(sourceFile.getFileName()); Assertions.assertFalse(targetFile.toFile().exists()); Files.copy(sourceFile, targetFile); @@ -122,16 +126,17 @@ public void hdfsWriteTest() { targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.13"; sourceFile = Paths.get(pathname); - targetDir = Paths.get(config.getQueueDirectory()); + targetDir = Paths.get(config.valueOf("queueDirectory")); targetFile = targetDir.resolve(sourceFile.getFileName()); Files.copy(sourceFile, targetFile); Assertions.assertTrue(targetFile.toFile().exists()); - avroFile = new File(config.getQueueDirectory() + "/0.13"); + avroFile = new File(config.valueOf("queueDirectory") + "/0.13"); recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":13}") .getAsJsonObject(); @@ -141,9 +146,11 @@ public void hdfsWriteTest() { targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } @@ -151,11 +158,11 @@ public void hdfsWriteTest() { public void hdfsWriteExceptionTest() { // This test case is for testing the functionality of the HDFSWrite.java exception handling by trying to write the same file twice and asserting that the proper exception is thrown. assertDoesNotThrow(() -> { - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); String pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.9"; java.nio.file.Path sourceFile = Paths.get(pathname); - java.nio.file.Path targetDir = Paths.get(config.getQueueDirectory()); + java.nio.file.Path targetDir = Paths.get(config.valueOf("queueDirectory")); java.nio.file.Path targetFile = targetDir.resolve(sourceFile.getFileName()); Assertions.assertFalse(targetFile.toFile().exists()); Files.copy(sourceFile, targetFile); @@ -170,12 +177,13 @@ public void hdfsWriteExceptionTest() { targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); Files.copy(sourceFile, targetFile); Assertions.assertTrue(targetFile.toFile().exists()); - avroFile = new File(config.getQueueDirectory() + "/0.9"); + avroFile = new File(config.valueOf("queueDirectory") + "/0.9"); recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); @@ -187,8 +195,9 @@ public void hdfsWriteExceptionTest() { targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index 296e7a4d..f9e81b47 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -72,7 +72,7 @@ public class Ingestion0FilesLowSizeTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion0FilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -82,12 +82,17 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 3000); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("maximumFileSize", "3000"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } @@ -111,8 +116,8 @@ public void ingestion0FilesLowSizeTest() { Maximum file size is set to 3,000 in the config. Empty HDFS database, 140 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in two avro-files per partition based on MaximumFileSize.*/ assertDoesNotThrow(() -> { - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); @@ -121,9 +126,9 @@ public void ingestion0FilesLowSizeTest() { // Assert that the kafka records were ingested correctly and the database holds the correct 140 records. // Check that the files were properly written to HDFS. - String hdfsuri = config.getHdfsuri(); + String hdfsuri = config.valueOf("hdfsuri"); - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // ====== Init HDFS File System Object Configuration conf = new Configuration(); // Set FileSystem URI @@ -143,20 +148,30 @@ public void ingestion0FilesLowSizeTest() { Assertions.assertTrue(fs.exists(newDirectoryPath)); // Assert that the kafka records were ingested correctly and the database holds the expected 20 files. - FileStatus[] fileStatuses = fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")); + FileStatus[] fileStatuses = fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")); Assertions - .assertEquals(10, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "1.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "2.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "3.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "4.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "5.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "6.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "7.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "8.10"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "9.10"))); + .assertEquals(10, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "1.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "2.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "3.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "4.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "5.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "6.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "7.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "8.10"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "9.10"))); LOGGER.debug("All expected files present in HDFS."); // Now Assert the files that were too small to be stored in HDFS. @@ -168,7 +183,7 @@ public void ingestion0FilesLowSizeTest() { for (String fileName : filenameList) { - String path2 = config.getQueueDirectory() + "/" + fileName; + String path2 = config.valueOf("queueDirectory") + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 9302d036..73ac5541 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -70,7 +70,7 @@ public class Ingestion0FilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion0FilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -80,12 +80,17 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("maximumFileSize", "30000"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } @@ -109,8 +114,8 @@ public void ingestion0FilesTest() { Maximum file size is set to 30,000 in the config. Empty HDFS database, 160 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in a single avro-file per partition (2 skipped records per file).*/ assertDoesNotThrow(() -> { - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); @@ -118,7 +123,7 @@ public void ingestion0FilesTest() { // Assert that the kafka records were ingested correctly and the database/temporary file holds the correct 140 records (20 broken records were skipped). assertDoesNotThrow(() -> { - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; Path newDirectoryPath = new Path(path); Assertions.assertTrue(fs.exists(newDirectoryPath)); @@ -134,7 +139,7 @@ public void ingestion0FilesTest() { // Assert that all the records are inside the temporary AVRO-files generated by PartitionFile objects during consumption. - File queueDirectory = new File(config.getQueueDirectory()); + File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(10, files.length); for (File file : files) { @@ -144,7 +149,7 @@ public void ingestion0FilesTest() { int partitionCounter = 0; for (String fileName : filenameList) { - String path2 = config.getQueueDirectory() + "/" + fileName; + String path2 = config.valueOf("queueDirectory") + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 001baa29..e2580697 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -76,7 +76,7 @@ public class Ingestion1Old1NewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion1Old1NewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -86,15 +86,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("maximumFileSize", "30000"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files to HDFS where one file has new timestamp and other old, which are normally generated during data ingestion from mock kafka consumer. - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -149,25 +154,30 @@ public void ingestion1Old1NewFileTest() { assertDoesNotThrow(() -> { // Assert the known starting state. - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - Assertions.assertTrue((System.currentTimeMillis() - config.getPruneOffset()) > 157784760000L); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. + Assertions + .assertTrue((System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset"))) > 157784760000L); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); // Assert that the kafka records were ingested and pruned correctly and the database holds only the expected 1 file. Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); // Assert the avro-files that were too small to be stored in HDFS. - String path1 = config.getQueueDirectory() + "/" + "testConsumerTopic0.1"; + String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); Assertions.assertTrue(avroFile1.exists()); DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); @@ -181,7 +191,7 @@ public void ingestion1Old1NewFileTest() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.getQueueDirectory() + "/" + fileName; + String path2 = config.valueOf("queueDirectory") + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index e40935cb..f2dcb284 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -74,7 +74,7 @@ public class Ingestion2NewFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion2NewFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -84,15 +84,21 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("maximumFileSize", "30000"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -143,24 +149,28 @@ public void ingestion2NewFilesTest() { */ assertDoesNotThrow(() -> { // Assert the known starting state. - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); // Assert that the kafka records were ingested correctly and the database holds the expected 2 files. Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); // Assert the avro-files that were too small to be stored in HDFS. - String path1 = config.getQueueDirectory() + "/" + "testConsumerTopic0.1"; + String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); Assertions.assertTrue(avroFile1.exists()); DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); @@ -174,7 +184,7 @@ public void ingestion2NewFilesTest() { filenameList.add("testConsumerTopic" + partition + "." + 1); } for (String fileName : filenameList) { - String path2 = config.getQueueDirectory() + "/" + fileName; + String path2 = config.valueOf("queueDirectory") + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 69888c0c..b56baebd 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -76,7 +76,7 @@ public class Ingestion2OldFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion2OldFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -86,15 +86,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("maximumFileSize", "30000"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -149,23 +154,26 @@ public void ingestion2OldFilesTest() { assertDoesNotThrow(() -> { // Assert the known starting state. - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct. - Assertions.assertTrue((System.currentTimeMillis() - config.getPruneOffset()) > 157784760000L); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. + Assertions + .assertTrue((System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset"))) > 157784760000L); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); Thread.sleep(10000); hdfsDataIngestion.run(); // Assert that the kafka records were ingested and pruned correctly and the database doesn't hold any files. Assertions - .assertEquals(0, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); // Assert the avro-files that were too small to be stored in HDFS. - String path1 = config.getQueueDirectory() + "/" + "testConsumerTopic0.1"; + String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); Assertions.assertTrue(avroFile1.exists()); DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); @@ -179,7 +187,7 @@ public void ingestion2OldFilesTest() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.getQueueDirectory() + "/" + fileName; + String path2 = config.valueOf("queueDirectory") + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index d4faa5f5..e756cc63 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -74,7 +74,7 @@ public class KafkaConsumerTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -84,12 +84,17 @@ public void startMiniCluster() { // Set system properties to use the valid configuration with skipping of broken records disabled. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/", 30000); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("maximumFileSize", "30000"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } @@ -153,7 +158,7 @@ public void readCoordinatorTest2Threads() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.getQueueDirectory() + "/" + fileName; + String path2 = config.valueOf("queueDirectory") + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); @@ -205,7 +210,7 @@ public void readCoordinatorTest1Thread() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.getQueueDirectory() + "/" + fileName; + String path2 = config.valueOf("queueDirectory") + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 49d52217..0f33e7fe 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -82,7 +82,7 @@ public class ProcessingFailureTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -92,12 +92,16 @@ public void startMiniCluster() { // Set system properties to use the valid configuration with skipping of broken records disabled. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/failProcessing.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } @@ -148,14 +152,14 @@ public void failNonRFC5424DatabaseOutputTest() { recordOffsetObjectList.add(recordOffsetObject); Exception e = Assertions.assertThrows(Exception.class, () -> output.accept(recordOffsetObjectList)); Assertions.assertEquals("com.teragrep.rlo_06.PriorityParseException: PRIORITY < missing", e.getMessage()); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. // Assert the local avro file that should e empty. - File queueDirectory = new File(config.getQueueDirectory()); + File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(1, files.length); - String path2 = config.getQueueDirectory() + "/" + "topicName0.1"; + String path2 = config.valueOf("queueDirectory") + "/" + "topicName0.1"; File avroFile = new File(path2); Assertions.assertTrue(avroFile.exists()); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); @@ -208,14 +212,14 @@ public void failNullRFC5424DatabaseOutputTest() { "java.lang.NullPointerException: Cannot read the array length because \"buf\" is null", e.getMessage() ); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "topicName" + "/" + "0.1"))); + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. // Assert the local avro file that should e empty. - File queueDirectory = new File(config.getQueueDirectory()); + File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(1, files.length); - String path2 = config.getQueueDirectory() + "/" + "topicName0.1"; + String path2 = config.valueOf("queueDirectory") + "/" + "topicName0.1"; File avroFile = new File(path2); Assertions.assertTrue(avroFile.exists()); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index 1d026961..1398663b 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -68,7 +68,7 @@ public class PruningNoFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningNoFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Start minicluster and initialize config. @@ -78,12 +78,16 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } @@ -100,15 +104,15 @@ public void teardownMiniCluster() { public void noFiles() { // This test case is for testing the functionality of the HDFSPrune.java when the target database is empty. assertDoesNotThrow(() -> { - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); Assertions - .assertEquals(0, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); int deleted = hdfsPrune.prune(); Assertions.assertEquals(0, deleted); Assertions - .assertEquals(0, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index 49ad6e13..a855fdfa 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -72,7 +72,7 @@ public class PruningOneNewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneNewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -82,15 +82,19 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts a single pre-made avro-file with a new timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -131,21 +135,24 @@ public void teardownMiniCluster() { @Test public void oneNewFileTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds a file with a timestamp that shouldn't trigger pruning of old files. - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions.assertTrue(System.currentTimeMillis() - config.getPruneOffset() > 157784760000L); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions + .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(0, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index a9898cb6..ce65bcd7 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -72,7 +72,7 @@ public class PruningOneOldFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneOldFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -82,15 +82,19 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts a single pre-made avro-file with an olf timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -131,21 +135,24 @@ public void teardownMiniCluster() { @Test public void oneOldFileTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds a file with a timestamp that should trigger pruning of old files. - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions.assertTrue(System.currentTimeMillis() - config.getPruneOffset() > 157784760000L); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions + .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(1, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(0, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index 007959c4..31dfce14 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -72,7 +72,7 @@ public class PruningOneOldOneNewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneOldOneNewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -82,16 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); /* Inserts pre-made avro-files to HDFS, which are normally generated during data ingestion from mock kafka consumer. One file has new timestamp and another old timestamp.*/ - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -135,23 +139,28 @@ public void teardownMiniCluster() { public void oneOldOneNewFileTest() { /* This test case is for testing the functionality of the HDFSPrune.java when the database holds a file with a timestamp that shouldn't trigger pruning of old files and another file that should trigger the pruning. The file with newer timestamp is ignored while the older is deleted from the database.*/ - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions.assertTrue(System.currentTimeMillis() - config.getPruneOffset() > 157784760000L); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions + .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(1, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(1, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index df6916f4..fa63cc0d 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -72,7 +72,7 @@ public class PruningTwoNewFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningTwoNewFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -82,16 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -133,21 +137,25 @@ public void teardownMiniCluster() { @Test public void twoNewFilesTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds two files with a timestamp that shouldn't trigger pruning of old files. - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. assertDoesNotThrow(() -> { Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(0, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index f5eb5c3b..2a089fa8 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -72,7 +72,7 @@ public class PruningTwoOldFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningTwoOldFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static Config config; + private static ConfigurationImpl config; private FileSystem fs; // Prepares known state for testing. @@ -82,16 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = new Config("hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - fs = new TestFileSystemFactory().create(config.getHdfsuri()); + config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.getHdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -133,24 +137,28 @@ public void teardownMiniCluster() { @Test public void twoOldFilesTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds two files with a timestamp that should trigger pruning of old files. - Assertions.assertTrue(config.getPruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions.assertTrue(System.currentTimeMillis() - config.getPruneOffset() > 157784760000L); + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions + .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); Assertions - .assertEquals(2, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions.assertTrue(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(2, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(0, fs.listStatus(new Path(config.getHdfsPath() + "/" + "testConsumerTopic")).length); - Assertions.assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions + .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertFalse(fs.exists(new Path(config.getHdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index 1649fa2b..fba0b547 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.consumers.kafka.SyslogAvroWriter; import org.apache.avro.file.DataFileReader; @@ -65,7 +65,7 @@ public class SyslogAvroWriterTest { - private static Config config; + private static ConfigurationImpl config; // Prepares known state for testing. @BeforeEach @@ -74,14 +74,18 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new Config(); + config = new ConfigurationImpl().loadPropertiesFile(); + config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config = config + .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + config.configureLogging(); }); } // Teardown the minicluster @AfterEach public void teardownMiniCluster() { - File queueDirectory = new File(config.getQueueDirectory()); + File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); if (files[0].getName().equals("topicName0.1")) { files[0].delete(); @@ -93,9 +97,9 @@ public void writeTest() { assertDoesNotThrow(() -> { - File queueDirectory = new File(config.getQueueDirectory()); + File queueDirectory = new File(config.valueOf("queueDirectory")); - File syslogFile = new File(config.getQueueDirectory() + File.separator + "topicName0.1"); + File syslogFile = new File(config.valueOf("queueDirectory") + File.separator + "topicName0.1"); ConsumerRecord record0 = new ConsumerRecord<>( "topicName", diff --git a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java index e03c158e..bf33648d 100644 --- a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java +++ b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.Config; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -56,7 +56,7 @@ // Helper class for creating FileSystem objects. public class TestMiniClusterFactory { - public MiniDFSCluster create(Config config, File baseDir) throws IOException { + public MiniDFSCluster create(ConfigurationImpl config, File baseDir) throws IOException { MiniDFSCluster hdfsCluster; // Create a HDFS miniCluster Configuration conf = new Configuration(); diff --git a/src/test/resources/broken.application.properties b/src/test/resources/broken.application.properties index 433eee07..586601f0 100644 --- a/src/test/resources/broken.application.properties +++ b/src/test/resources/broken.application.properties @@ -1,27 +1,33 @@ +# Kafka security configuration file +java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas +# Logger settings +log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 # Kafka bootstrap servers -consumer.bootstrap.servers=test +bootstrap.servers=test # Offset, should not be touched -consumer.auto.offset.reset=earliest +auto.offset.reset=earliest # Autocommit, should not be touched -consumer.enable.auto.commit=false +enable.auto.commit=false # Consumer group id, this is to track the progress of reading hte topic -consumer.group.id=cfe_39 +group.id=cfe_39 # Used security protocol and mechanism -consumer.security.protocol=SASL_PLAINTEXT -consumer.sasl.mechanism=PLAIN +security.protocol=SASL_PLAINTEXT +sasl.mechanism=PLAIN # Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -consumer.max.poll.records=500 +max.poll.records=500 # How much data can be fetched in one go -consumer.fetch.max.bytes=1073741820 +fetch.max.bytes=1073741820 # How long for request before timing out. Note that too big max poll records size can cause this to trigger -consumer.request.timeout.ms=300000 -consumer.max.poll.interval.ms=300000 -# For testing only, remove for prod. -consumer.useMockKafkaConsumer=true +request.timeout.ms=300000 +max.poll.interval.ms=300000 +# For testing only +useMockKafkaConsumer=true +# Directory where AVRO files are constructed for HDFS +queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. @@ -30,6 +36,8 @@ skipNonRFC5424Records=true skipEmptyRFC5424Records=true # HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L pruneOffset=157784760000 +# HDFS path +hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ # Kerberos java.security.krb5.kdc=test java.security.krb5.realm=test @@ -41,4 +49,6 @@ KerberosKeytabPath=test dfs.client.use.datanode.hostname=false kerberosLoginAutorenewal=true dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file +dfs.encrypt.data.transfer.cipher.suites=test +# timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS +consumerTimeout=300000 \ No newline at end of file diff --git a/src/test/resources/failProcessing.application.properties b/src/test/resources/failProcessing.application.properties index 55bc98d2..bdaf3507 100644 --- a/src/test/resources/failProcessing.application.properties +++ b/src/test/resources/failProcessing.application.properties @@ -1,27 +1,33 @@ +# Kafka security configuration file +java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas +# Logger settings +log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 # Kafka bootstrap servers -consumer.bootstrap.servers=test +bootstrap.servers=test # Offset, should not be touched -consumer.auto.offset.reset=earliest +auto.offset.reset=earliest # Autocommit, should not be touched -consumer.enable.auto.commit=false +enable.auto.commit=false # Consumer group id, this is to track the progress of reading hte topic -consumer.group.id=cfe_39 +group.id=cfe_39 # Used security protocol and mechanism -consumer.security.protocol=SASL_PLAINTEXT -consumer.sasl.mechanism=PLAIN +security.protocol=SASL_PLAINTEXT +sasl.mechanism=PLAIN # Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -consumer.max.poll.records=500 +max.poll.records=500 # How much data can be fetched in one go -consumer.fetch.max.bytes=1073741820 +fetch.max.bytes=1073741820 # How long for request before timing out. Note that too big max poll records size can cause this to trigger -consumer.request.timeout.ms=300000 -consumer.max.poll.interval.ms=300000 -# For testing only, remove for prod. -consumer.useMockKafkaConsumer=true +request.timeout.ms=300000 +max.poll.interval.ms=300000 +# For testing only +useMockKafkaConsumer=true +# Directory where AVRO files are constructed for HDFS +queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. @@ -32,6 +38,8 @@ skipEmptyRFC5424Records=false pruneOffset=157784760000 # HDFS uri hdfsuri=hdfs://localhost:45937/ +# HDFS path +hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ # Kerberos java.security.krb5.kdc=test java.security.krb5.realm=test @@ -43,4 +51,6 @@ KerberosKeytabPath=test dfs.client.use.datanode.hostname=false kerberosLoginAutorenewal=true dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file +dfs.encrypt.data.transfer.cipher.suites=test +# timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS +consumerTimeout=300000 \ No newline at end of file diff --git a/src/test/resources/largeFile.application.properties b/src/test/resources/largeFile.application.properties index b6ffbc3a..33162ea1 100644 --- a/src/test/resources/largeFile.application.properties +++ b/src/test/resources/largeFile.application.properties @@ -1,27 +1,33 @@ +# Kafka security configuration file +java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas +# Logger settings +log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 # Kafka bootstrap servers -consumer.bootstrap.servers=test +bootstrap.servers=test # Offset, should not be touched -consumer.auto.offset.reset=earliest +auto.offset.reset=earliest # Autocommit, should not be touched -consumer.enable.auto.commit=false +enable.auto.commit=false # Consumer group id, this is to track the progress of reading hte topic -consumer.group.id=cfe_39 +group.id=cfe_39 # Used security protocol and mechanism -consumer.security.protocol=SASL_PLAINTEXT -consumer.sasl.mechanism=PLAIN +security.protocol=SASL_PLAINTEXT +sasl.mechanism=PLAIN # Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -consumer.max.poll.records=500 +max.poll.records=500 # How much data can be fetched in one go -consumer.fetch.max.bytes=1073741820 +fetch.max.bytes=1073741820 # How long for request before timing out. Note that too big max poll records size can cause this to trigger -consumer.request.timeout.ms=300000 -consumer.max.poll.interval.ms=300000 -# For testing only, remove for prod. -consumer.useMockKafkaConsumer=true +request.timeout.ms=300000 +max.poll.interval.ms=300000 +# For testing only +useMockKafkaConsumer=true +# Directory where AVRO files are constructed for HDFS +queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. maximumFileSize=3000000 # Boolean for deciding if records not in RFC5424 should be skipped or not. @@ -32,6 +38,8 @@ skipEmptyRFC5424Records=true pruneOffset=157784760000 # HDFS uri hdfsuri=hdfs://localhost:45937/ +# HDFS path +hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ # Kerberos java.security.krb5.kdc=test java.security.krb5.realm=test @@ -43,4 +51,6 @@ KerberosKeytabPath=test dfs.client.use.datanode.hostname=false kerberosLoginAutorenewal=true dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file +dfs.encrypt.data.transfer.cipher.suites=test +# timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS +consumerTimeout=300000 \ No newline at end of file diff --git a/src/test/resources/valid.application.properties b/src/test/resources/valid.application.properties index acbcf93d..19ea1b26 100644 --- a/src/test/resources/valid.application.properties +++ b/src/test/resources/valid.application.properties @@ -1,27 +1,33 @@ +# Kafka security configuration file +java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas +# Logger settings +log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 # Kafka bootstrap servers -consumer.bootstrap.servers=test +bootstrap.servers=test # Offset, should not be touched -consumer.auto.offset.reset=earliest +auto.offset.reset=earliest # Autocommit, should not be touched -consumer.enable.auto.commit=false +enable.auto.commit=false # Consumer group id, this is to track the progress of reading hte topic -consumer.group.id=cfe_39 +group.id=cfe_39 # Used security protocol and mechanism -consumer.security.protocol=SASL_PLAINTEXT -consumer.sasl.mechanism=PLAIN +security.protocol=SASL_PLAINTEXT +sasl.mechanism=PLAIN # Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -consumer.max.poll.records=500 +max.poll.records=500 # How much data can be fetched in one go -consumer.fetch.max.bytes=1073741820 +fetch.max.bytes=1073741820 # How long for request before timing out. Note that too big max poll records size can cause this to trigger -consumer.request.timeout.ms=300000 -consumer.max.poll.interval.ms=300000 -# For testing only, remove for prod. -consumer.useMockKafkaConsumer=true +request.timeout.ms=300000 +max.poll.interval.ms=300000 +# For testing only +useMockKafkaConsumer=true +# Directory where AVRO files are constructed for HDFS +queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. @@ -32,6 +38,8 @@ skipEmptyRFC5424Records=true pruneOffset=157784760000 # HDFS uri hdfsuri=hdfs://localhost:45937/ +# HDFS path +hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ # Kerberos java.security.krb5.kdc=test java.security.krb5.realm=test @@ -43,4 +51,6 @@ KerberosKeytabPath=test dfs.client.use.datanode.hostname=false kerberosLoginAutorenewal=true dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file +dfs.encrypt.data.transfer.cipher.suites=test +# timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS +consumerTimeout=300000 \ No newline at end of file From 990c65af64731e7b4af6231dfb2cc659638e934a Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 6 Sep 2024 15:41:54 +0300 Subject: [PATCH 41/77] Removed now obsolete Config.java and ConfigTest.java files. --- .../teragrep/cfe_39/configuration/Config.java | 316 ------------------ .../java/com/teragrep/cfe_39/ConfigTest.java | 124 ------- 2 files changed, 440 deletions(-) delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/Config.java delete mode 100644 src/test/java/com/teragrep/cfe_39/ConfigTest.java diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Config.java b/src/main/java/com/teragrep/cfe_39/configuration/Config.java deleted file mode 100644 index f8bceaef..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/Config.java +++ /dev/null @@ -1,316 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import org.apache.logging.log4j.core.config.Configurator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Enumeration; -import java.util.Properties; - -public class Config { - - private final String queueTopicPattern; - private final Properties kafkaConsumerProperties; - private static final Logger LOGGER = LoggerFactory.getLogger(Config.class); - private final String hdfsPath; - private final String hdfsuri; - private final String queueDirectory; - private final String kerberosHost; - private final String kerberosRealm; - private final String kerberosPrincipal; - private final String hadoopAuthentication; - private final String hadoopAuthorization; - private final String kerberosKeytabUser; - private final String kerberosKeytabPath; - private final String kerberosLoginAutorenewal; - private final String kerberosTestMode; - private final long maximumFileSize; - private final int numOfConsumers; - private final long pruneOffset; - private final boolean skipNonRFC5424Records; - private final boolean skipEmptyRFC5424Records; - private final String dfsDataTransferProtection; - private final String dfsEncryptDataTransferCipherSuites; - private final long consumerTimeout; - - public Config() throws IOException { - this("", 0); - } - - public Config(long maximumFileSize) throws IOException { - this("", maximumFileSize); - } - - public Config(String hdfsuri) throws IOException { - this(hdfsuri, 0); - } - - public Config(String hdfsuri, long maximumFileSize) throws IOException { - Properties properties = new Properties(); - Path configPath = Paths - .get(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); - LOGGER.info("Loading application config <[{}]>", configPath.toAbsolutePath()); - - try (InputStream inputStream = Files.newInputStream(configPath)) { - properties.load(inputStream); - LOGGER.debug("Got configuration: <{}>", properties); - } - - // HDFS - this.hdfsPath = properties.getProperty("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); - if (hdfsuri.isEmpty() || hdfsuri == null) { - this.hdfsuri = properties.getProperty("hdfsuri"); - } - else { - this.hdfsuri = hdfsuri; - } - if (this.hdfsuri == null) { - throw new IllegalArgumentException("hdfsuri not set"); - } - - // HDFS pruning - this.pruneOffset = Long.parseLong(properties.getProperty("pruneOffset", "172800000")); - if (this.pruneOffset <= 0) { - throw new IllegalArgumentException("pruneOffset must be set to >0, got " + pruneOffset); - } - - // AVRO - this.queueDirectory = properties.getProperty("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - if (maximumFileSize > 0) { - this.maximumFileSize = maximumFileSize; - } - else { - this.maximumFileSize = Long.parseLong(properties.getProperty("maximumFileSize", "60800000")); - if (this.maximumFileSize <= 0) { - throw new IllegalArgumentException("maximumFileSize must be set to >0, got " + this.maximumFileSize); - } - } - - // kerberos - this.kerberosHost = properties.getProperty("java.security.krb5.kdc"); - if (this.kerberosHost == null) { - throw new IllegalArgumentException("kerberosHost not set"); - } - this.kerberosRealm = properties.getProperty("java.security.krb5.realm"); - if (this.kerberosRealm == null) { - throw new IllegalArgumentException("kerberosRealm not set"); - } - this.hadoopAuthentication = properties.getProperty("hadoop.security.authentication"); - if (this.hadoopAuthentication == null) { - throw new IllegalArgumentException("hadoopAuthentication not set"); - } - this.hadoopAuthorization = properties.getProperty("hadoop.security.authorization"); - if (this.hadoopAuthorization == null) { - throw new IllegalArgumentException("hadoopAuthorization not set"); - } - this.kerberosPrincipal = properties.getProperty("dfs.namenode.kerberos.principal.pattern"); - if (this.kerberosPrincipal == null) { - throw new IllegalArgumentException("kerberosPrincipal not set"); - } - this.kerberosKeytabUser = properties.getProperty("KerberosKeytabUser"); - if (this.kerberosKeytabUser == null) { - throw new IllegalArgumentException("kerberosKeytabUser not set"); - } - this.kerberosKeytabPath = properties.getProperty("KerberosKeytabPath"); - if (this.kerberosKeytabPath == null) { - throw new IllegalArgumentException("kerberosKeytabPath not set"); - } - this.kerberosLoginAutorenewal = properties.getProperty("kerberosLoginAutorenewal"); - if (this.kerberosLoginAutorenewal == null) { - throw new IllegalArgumentException("kerberosLoginAutorenewal not set"); - } - this.kerberosTestMode = properties.getProperty("dfs.client.use.datanode.hostname", "false"); - - this.dfsDataTransferProtection = properties.getProperty("dfs.data.transfer.protection"); - if (this.dfsDataTransferProtection == null) { - throw new IllegalArgumentException("dfsDataTransferProtection not set"); - } - this.dfsEncryptDataTransferCipherSuites = properties.getProperty("dfs.encrypt.data.transfer.cipher.suites"); - if (this.dfsEncryptDataTransferCipherSuites == null) { - throw new IllegalArgumentException("dfsEncryptDataTransferCipherSuites not set"); - } - - // kafka - this.queueTopicPattern = properties.getProperty("queueTopicPattern", "^.*$"); - this.numOfConsumers = Integer.parseInt(properties.getProperty("numOfConsumers", "1")); - this.consumerTimeout = Long.parseLong(properties.getProperty("pruneOffset", "300000")); - - // skip non RFC5424 records - this.skipNonRFC5424Records = properties.getProperty("skipNonRFC5424Records", "false").equalsIgnoreCase("true"); - - // skip empty RFC5424 records - this.skipEmptyRFC5424Records = properties - .getProperty("skipEmptyRFC5424Records", "false") - .equalsIgnoreCase("true"); - - this.kafkaConsumerProperties = loadSubProperties(properties, "consumer."); - String loginConfig = properties - .getProperty("java.security.auth.login.config", System.getProperty("user.dir") + "/rpm/resources/config.jaas"); - if (loginConfig == null) { - throw new IOException("Property java.security.auth.login.config does not exist"); - } - if (!(new File(loginConfig)).isFile()) { - throw new IOException("File '" + loginConfig + "' set by java.security.auth.login.config does not exist"); - } - - // Just for loggers to work - Path log4j2Config = Paths - .get(properties.getProperty("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties")); - LOGGER.info("Loading log4j2 config from <[{}]>", log4j2Config.toRealPath()); - Configurator.reconfigure(log4j2Config.toUri()); - } - - private Properties loadSubProperties(Properties properties, String prefix) { - Properties subProperties = new Properties(); - - Enumeration keys = properties.keys(); - while (keys.hasMoreElements()) { - String key = String.valueOf(keys.nextElement()); - if (key.startsWith(prefix)) { - String value = properties.getProperty(key); - String subKey = key.replaceFirst(prefix, ""); - subProperties.put(subKey, value); - } - } - return subProperties; - } - - public String getHdfsPath() { - return hdfsPath; - } - - public String getHdfsuri() { - return hdfsuri; - } - - public String getQueueDirectory() { - return queueDirectory; - } - - public String getQueueTopicPattern() { - return queueTopicPattern; - } - - public Properties getKafkaConsumerProperties() { - return kafkaConsumerProperties; - } - - public String getKerberosHost() { - return kerberosHost; - } - - public String getKerberosRealm() { - return kerberosRealm; - } - - public String getKerberosPrincipal() { - return kerberosPrincipal; - } - - public String getHadoopAuthentication() { - return hadoopAuthentication; - } - - public String getHadoopAuthorization() { - return hadoopAuthorization; - } - - public String getKerberosKeytabUser() { - return kerberosKeytabUser; - } - - public String getKerberosKeytabPath() { - return kerberosKeytabPath; - } - - public String getKerberosTestMode() { - return kerberosTestMode; - } - - public long getMaximumFileSize() { - return maximumFileSize; - } - - public int getNumOfConsumers() { - return numOfConsumers; - } - - public long getPruneOffset() { - return pruneOffset; - } - - public boolean getSkipNonRFC5424Records() { - return skipNonRFC5424Records; - } - - public boolean getSkipEmptyRFC5424Records() { - return skipEmptyRFC5424Records; - } - - public String getKerberosLoginAutorenewal() { - return kerberosLoginAutorenewal; - } - - public String getDfsDataTransferProtection() { - return dfsDataTransferProtection; - } - - public String getDfsEncryptDataTransferCipherSuites() { - return dfsEncryptDataTransferCipherSuites; - } - - public long consumerTimeout() { - return consumerTimeout; - } -} diff --git a/src/test/java/com/teragrep/cfe_39/ConfigTest.java b/src/test/java/com/teragrep/cfe_39/ConfigTest.java deleted file mode 100644 index 89068f8a..00000000 --- a/src/test/java/com/teragrep/cfe_39/ConfigTest.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39; - -import com.teragrep.cfe_39.configuration.Config; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Properties; - -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; - -public class ConfigTest { - - private static final Logger LOGGER = LoggerFactory.getLogger(ConfigTest.class); - - @Disabled - @Test - public void validConfigTest() { - assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Config config = new Config(); - Properties readerKafkaProperties = config.getKafkaConsumerProperties(); - // Test extracting useMockKafkaConsumer value from config. - boolean useMockKafkaConsumer = Boolean - .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); - Assertions.assertTrue(useMockKafkaConsumer); - LOGGER.debug("useMockKafkaConsumer: {}", useMockKafkaConsumer); - }); - } - - @Disabled - @Test - public void brokenConfigTest() { - // Set system properties to use the broken configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); - // Test if the broken configuration throws the expected exception. - Exception e = Assertions.assertThrows(Exception.class, () -> { - Config config = new Config(); - }); - Assertions.assertEquals("hdfsuri not set", e.getMessage()); - } - - @Disabled - @Test - public void configEqualityTest() { - assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Config config1 = new Config(); - Config config2 = new Config(); - Config config3 = new Config("12345"); - Config config4 = new Config("12345"); - Assertions.assertNotEquals(config1, config2); - Assertions.assertNotEquals(config1, config3); - Assertions.assertNotEquals(config3, config4); - }); - } - - @Disabled - @Test - public void configConstructorTest() { - assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Config config1 = new Config(); - Config config2 = new Config("12345"); - Assertions.assertEquals(config1.getHdfsuri(), "hdfs://localhost:45937/"); - Assertions.assertEquals(config2.getHdfsuri(), "12345"); - }); - } - -} From c1410bec9c3f57561c4c274cd84abf3d9a3d8cd4 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 9 Sep 2024 12:04:51 +0300 Subject: [PATCH 42/77] Improved Configuration tests. Removed duplicate validation of maximumFileSize property value. --- .../ConfigurationValidationImpl.java | 5 -- .../teragrep/cfe_39/ConfigurationTest.java | 89 ++++++++++++++++--- 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java index f83f58c2..e11c93b6 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java @@ -103,11 +103,6 @@ private void validateValues(Properties properties) { "numOfConsumers must be set to >0, got " + properties.getProperty("numOfConsumers") ); } - if (Long.parseLong(properties.getProperty("maximumFileSize")) <= 0) { - throw new IllegalArgumentException( - "maximumFileSize must be set to >0, got " + properties.getProperty("maximumFileSize") - ); - } if (Long.parseLong(properties.getProperty("consumerTimeout")) <= 0) { throw new IllegalArgumentException( "consumerTimeout must be set to >0, got " + properties.getProperty("consumerTimeout") diff --git a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java index 2d5d8121..5b71bb5a 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java @@ -48,32 +48,99 @@ import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; public class ConfigurationTest { + private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationTest.class); + @Test - public void configurationTest() { + public void kafkaPropertiesConfigurationTest() { assertDoesNotThrow(() -> { + // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); - String s = configuration.valueOf("hdfsuri"); - Assertions.assertEquals("hdfs://localhost:45937/", s); - configuration = configuration.with("hdfsuri", "123456"); - s = configuration.valueOf("hdfsuri"); - Assertions.assertEquals("123456", s); + Properties readerKafkaProperties = configuration.toKafkaConsumerProperties(); + // Test extracting useMockKafkaConsumer value from config. + boolean useMockKafkaConsumer = Boolean + .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); + Assertions.assertTrue(useMockKafkaConsumer); + LOGGER.debug("useMockKafkaConsumer: {}", useMockKafkaConsumer); }); } @Test - public void configurationTest2() { - assertDoesNotThrow(() -> { - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + public void brokenConfigurationTest() { + // Set system properties to use the broken configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); + Exception e = Assertions.assertThrows(Exception.class, () -> { ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); - configuration.toKafkaConsumerProperties(); }); + Assertions.assertEquals("Missing required key hdfsuri", e.getMessage()); + } + + @Test + public void configurationEqualityTest() { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + assertDoesNotThrow(() -> { + ConfigurationImpl configuration1 = new ConfigurationImpl().loadPropertiesFile(); + ConfigurationImpl configuration2 = new ConfigurationImpl().loadPropertiesFile(); + ConfigurationImpl configuration3 = new ConfigurationImpl().loadPropertiesFile(); + ConfigurationImpl configuration4 = new ConfigurationImpl().loadPropertiesFile(); + Assertions.assertNotEquals(configuration1, configuration2); + Assertions.assertNotEquals(configuration1, configuration3); + Assertions.assertNotEquals(configuration3, configuration4); + configuration3 = configuration3.with("hdfsuri", "12345"); + configuration4 = configuration4.with("hdfsuri", "12345"); + Assertions.assertNotEquals(configuration1, configuration3); + Assertions.assertNotEquals(configuration3, configuration4); + }); + } + + @Test + public void configurationWithTest() { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + assertDoesNotThrow(() -> { + ConfigurationImpl configuration1 = new ConfigurationImpl().loadPropertiesFile(); + ConfigurationImpl configuration2 = new ConfigurationImpl().loadPropertiesFile().with("hdfsuri", "12345"); + Assertions.assertEquals(configuration1.valueOf("hdfsuri"), "hdfs://localhost:45937/"); + Assertions.assertEquals(configuration2.valueOf("hdfsuri"), "12345"); + }); + } + + @Test + public void configurationWithFailTest() { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + Exception e = Assertions.assertThrows(IllegalStateException.class, () -> { + ConfigurationImpl configuration = new ConfigurationImpl() + .loadPropertiesFile() + .with("unauthorized_key", "12345"); + }); + Assertions.assertEquals("Unauthorized key unauthorized_key", e.getMessage()); + } + + @Test + public void configurationWithFailTest2() { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + Exception e = Assertions.assertThrows(IllegalArgumentException.class, () -> { + ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile().with("maximumFileSize", "0"); + }); + Assertions.assertEquals("maximumFileSize must be set to >0, got 0", e.getMessage()); } + } From 86d884f2369d5215f2abe046c149013c5f7f9459 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 9 Sep 2024 12:24:48 +0300 Subject: [PATCH 43/77] Removed now unused NullKafkaRecord.java. --- .../consumers/kafka/NullKafkaRecord.java | 67 ------------------- 1 file changed, 67 deletions(-) delete mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java deleted file mode 100644 index 20b644ae..00000000 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/NullKafkaRecord.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.consumers.kafka; - -import com.teragrep.cfe_39.avro.SyslogRecord; - -// Null object design pattern, used to create null offset objects. -public final class NullKafkaRecord implements KafkaRecord { - - @Override - public long size() { - return 0; - } - - @Override - public String offsetToJSON() { - return "{\"topic\":\"Not available\", \"partition\":0, \"offset\":0}"; - } - - @Override - public SyslogRecord toSyslogRecord() { - return SyslogRecord.newBuilder().build(); - } -} From a928f4d3941114b85f97cfa0bd08a886de54aab6 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 9 Sep 2024 12:56:10 +0300 Subject: [PATCH 44/77] Removed unneeded sleep() calls from tests, added comments for calls that are needed. --- .../com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java | 1 - src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java | 1 - .../java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java | 1 - .../java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java | 1 - .../java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java | 1 - src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java | 6 ++---- 6 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index f9e81b47..82fcbfb8 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -119,7 +119,6 @@ public void ingestion0FilesLowSizeTest() { Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); - Thread.sleep(10000); hdfsDataIngestion.run(); }); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 73ac5541..c80213ef 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -117,7 +117,6 @@ public void ingestion0FilesTest() { Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); - Thread.sleep(10000); hdfsDataIngestion.run(); }); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index e2580697..35135b9c 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -165,7 +165,6 @@ public void ingestion1Old1NewFileTest() { Assertions .assertTrue((System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset"))) > 157784760000L); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); - Thread.sleep(10000); hdfsDataIngestion.run(); // Assert that the kafka records were ingested and pruned correctly and the database holds only the expected 1 file. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index f2dcb284..a313409a 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -158,7 +158,6 @@ public void ingestion2NewFilesTest() { .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); - Thread.sleep(10000); hdfsDataIngestion.run(); // Assert that the kafka records were ingested correctly and the database holds the expected 2 files. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index b56baebd..579e883e 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -165,7 +165,6 @@ public void ingestion2OldFilesTest() { Assertions .assertTrue((System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset"))) > 157784760000L); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); - Thread.sleep(10000); hdfsDataIngestion.run(); // Assert that the kafka records were ingested and pruned correctly and the database doesn't hold any files. diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index e756cc63..26764366 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -137,8 +137,6 @@ public void readCoordinatorTest2Threads() { Thread readThread = new Thread(null, readCoordinator, "testConsumerTopic1"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. readThread.start(); // Starts the thread, in other words proceeds to call run() function of ReadCoordinator. - Thread.sleep(1000); - ReadCoordinator readCoordinator2 = new ReadCoordinator( "testConsumerTopic", config, @@ -148,7 +146,7 @@ public void readCoordinatorTest2Threads() { Thread readThread2 = new Thread(null, readCoordinator2, "testConsumerTopic2"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. readThread2.start(); // Starts the thread, in other words proceeds to call run() function of ReadCoordinator. - Thread.sleep(10000); + Thread.sleep(10000); // Allow read threads to have enough time to execute their tasks properly. // Because BatchDistributionImpl can not be used as a functional interface, must do assertion through avro-files until better solution is found (add fake to interface?). @@ -200,7 +198,7 @@ public void readCoordinatorTest1Thread() { Thread readThread = new Thread(null, readCoordinator, "testConsumerTopic0"); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. readThread.start(); // Starts the thread, in other words proceeds to call run() function of ReadCoordinator. - Thread.sleep(10000); + Thread.sleep(10000); // Allow read thread to have enough time to execute the task properly. // Because BatchDistributionImpl can not be used as a functional interface, must do assertion through avro-files until better solution is found (add fake to interface?). From b1597edae28325d0110584765eb757f86c264e0d Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 9 Sep 2024 14:08:15 +0300 Subject: [PATCH 45/77] Removed unnecessary SyslogRecord initializations. --- .../teragrep/cfe_39/BatchDistributionTest.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 52b1fed6..8bf7754e 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -352,11 +352,10 @@ record = new ConsumerRecord<>( inputStream, new SpecificDatumReader<>(SyslogRecord.class) ); - SyslogRecord syslogRecord = null; LOGGER.info("\nReading records from file {}:", hdfsreadpath); Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); + SyslogRecord syslogRecord = reader.next(); Assertions.assertEquals(0, syslogRecord.getOffset()); Assertions.assertTrue(reader.hasNext()); syslogRecord = reader.next(syslogRecord); @@ -404,11 +403,10 @@ record = new ConsumerRecord<>( inputStream2, new SpecificDatumReader<>(SyslogRecord.class) ); - SyslogRecord syslogRecord2 = null; LOGGER.info("\nReading records from file {}:", hdfsreadpath); Assertions.assertTrue(reader2.hasNext()); - syslogRecord2 = reader2.next(syslogRecord2); + SyslogRecord syslogRecord2 = reader2.next(); Assertions.assertEquals(11, syslogRecord2.getOffset()); Assertions.assertTrue(reader2.hasNext()); syslogRecord2 = reader2.next(syslogRecord2); @@ -503,11 +501,10 @@ public void skipNonRFC5424DatabaseOutputTest() { inputStream, new SpecificDatumReader<>(SyslogRecord.class) ); - SyslogRecord syslogRecord = null; LOGGER.info("\nReading records from file {}:", hdfsreadpath); Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); + SyslogRecord syslogRecord = reader.next(); Assertions .assertEquals( "{\"timestamp\": 1650872090807000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"0\", \"offset\": 3, \"origin\": \"jla-02.default\", \"payload\": \"[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!\"}", @@ -585,11 +582,10 @@ public void skipNullRFC5424DatabaseOutputTest() { inputStream, new SpecificDatumReader<>(SyslogRecord.class) ); - SyslogRecord syslogRecord = null; LOGGER.info("\nReading records from file {}:", hdfsreadpath); Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); + SyslogRecord syslogRecord = reader.next(); Assertions .assertEquals( "{\"timestamp\": 1650872090807000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"0\", \"offset\": 2, \"origin\": \"jla-02.default\", \"payload\": \"[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!\"}", @@ -671,11 +667,10 @@ record = new ConsumerRecord<>( inputStream, new SpecificDatumReader<>(SyslogRecord.class) ); - SyslogRecord syslogRecord = null; LOGGER.info("\nReading records from file {}:", hdfsreadpath); Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); + SyslogRecord syslogRecord = reader.next(); Assertions .assertEquals( "{\"timestamp\": 1650872090807000, \"directory\": \"jla02logger\", \"stream\": \"test:jla02logger:0\", \"host\": \"jla-02.default\", \"input\": \"imrelp:cfe-06-0.cfe-06.default:\", \"partition\": \"0\", \"offset\": 3, \"origin\": \"jla-02.default\", \"payload\": \"[ERROR] 2022-04-25 07:34:50,806 com.teragrep.jla_02.Log4j Log - Log4j error says hi!\"}", From b0eb72b1b8609cf588a3f0f519765b3a06a365d7 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 9 Sep 2024 14:34:30 +0300 Subject: [PATCH 46/77] Implemented for loops for normalRecordsTest assertions. --- .../cfe_39/BatchDistributionTest.java | 52 ++++--------------- 1 file changed, 10 insertions(+), 42 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 8bf7754e..1df64890 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -354,39 +354,11 @@ record = new ConsumerRecord<>( ); LOGGER.info("\nReading records from file {}:", hdfsreadpath); - Assertions.assertTrue(reader.hasNext()); - SyslogRecord syslogRecord = reader.next(); - Assertions.assertEquals(0, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(1, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(2, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(3, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(4, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(5, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(6, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(7, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(8, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(9, syslogRecord.getOffset()); - Assertions.assertTrue(reader.hasNext()); - syslogRecord = reader.next(syslogRecord); - Assertions.assertEquals(10, syslogRecord.getOffset()); + for (int i = 0; i <= 10; i++) { + Assertions.assertTrue(reader.hasNext()); + SyslogRecord syslogRecord = reader.next(); + Assertions.assertEquals(i, syslogRecord.getOffset()); + } Assertions.assertFalse(reader.hasNext()); // Use empty batch to flush the local files to HDFS. @@ -405,15 +377,11 @@ record = new ConsumerRecord<>( ); LOGGER.info("\nReading records from file {}:", hdfsreadpath); - Assertions.assertTrue(reader2.hasNext()); - SyslogRecord syslogRecord2 = reader2.next(); - Assertions.assertEquals(11, syslogRecord2.getOffset()); - Assertions.assertTrue(reader2.hasNext()); - syslogRecord2 = reader2.next(syslogRecord2); - Assertions.assertEquals(12, syslogRecord2.getOffset()); - Assertions.assertTrue(reader2.hasNext()); - syslogRecord2 = reader2.next(syslogRecord2); - Assertions.assertEquals(13, syslogRecord2.getOffset()); + for (int i = 11; i <= 13; i++) { + Assertions.assertTrue(reader2.hasNext()); + SyslogRecord syslogRecord2 = reader2.next(); + Assertions.assertEquals(i, syslogRecord2.getOffset()); + } Assertions.assertFalse(reader2.hasNext()); }); } From 887da33cafa7a419a13c20b3b8c42d95c7c42010 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 10 Sep 2024 09:29:40 +0300 Subject: [PATCH 47/77] Set all classes to be final wherever possible. Made HdfsDataIngestion and ReadCoordinator immutable. --- src/main/java/com/teragrep/cfe_39/Main.java | 2 +- .../kafka/ConsumerRebalanceListenerImpl.java | 2 +- .../teragrep/cfe_39/consumers/kafka/HDFSPrune.java | 2 +- .../teragrep/cfe_39/consumers/kafka/HDFSWrite.java | 2 +- .../cfe_39/consumers/kafka/HdfsDataIngestion.java | 12 ++++++------ .../cfe_39/consumers/kafka/KafkaAsSyslogRecord.java | 2 +- .../consumers/kafka/MockKafkaConsumerFactory.java | 2 +- .../cfe_39/consumers/kafka/PartitionFileImpl.java | 2 +- .../cfe_39/consumers/kafka/ReadCoordinator.java | 7 +++---- .../cfe_39/consumers/kafka/SyslogAvroWriter.java | 2 +- 10 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index bb7dfa64..c6f3aa31 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -52,7 +52,7 @@ import java.io.IOException; -public class Main { +public final class Main { private static final Logger LOGGER = LoggerFactory.getLogger(Main.class); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index 286ba329..6bbaca27 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -62,7 +62,7 @@ import java.util.HashMap; import java.util.Map; -public class ConsumerRebalanceListenerImpl implements ConsumerRebalanceListener { +public final class ConsumerRebalanceListenerImpl implements ConsumerRebalanceListener { private final Logger LOGGER = LoggerFactory.getLogger(ConsumerRebalanceListenerImpl.class); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java index 7f5a8913..1e7a97db 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java @@ -54,7 +54,7 @@ import java.io.IOException; -public class HDFSPrune { +public final class HDFSPrune { private static final Logger LOGGER = LoggerFactory.getLogger(HDFSPrune.class); private final FileSystem fs; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index 7dbd1d7e..500b37a6 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -57,7 +57,7 @@ import java.net.URI; import java.util.Properties; -public class HDFSWrite implements AutoCloseable { +public final class HDFSWrite implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(HDFSWrite.class); private final String fileName; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 855c5975..f7090ce0 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -70,21 +70,19 @@ import java.util.regex.Pattern; // Ingests data for HDFS database, periodically scans kafka for new topics based on config.getQueueTopicPattern() and creates kafka topic consumer groups for the new topics that will store the records to HDFS. -public class HdfsDataIngestion { +public final class HdfsDataIngestion { private static final Logger LOGGER = LoggerFactory.getLogger(HdfsDataIngestion.class); private final ConfigurationImpl config; private final org.apache.kafka.clients.consumer.Consumer kafkaConsumer; private final List threads = new ArrayList<>(); private final Set activeTopics = new HashSet<>(); - private boolean keepRunning; - private boolean useMockKafkaConsumer; + private final boolean useMockKafkaConsumer; private final int numOfConsumers; - private Map hdfsStartOffsets; + private final Map hdfsStartOffsets; private final FileSystem fs; public HdfsDataIngestion(ConfigurationImpl config) throws IOException { - keepRunning = true; this.config = config; Properties readerKafkaProperties = config.toKafkaConsumerProperties(); this.numOfConsumers = Integer.parseInt(config.valueOf("numOfConsumers")); @@ -168,13 +166,15 @@ public void run() throws InterruptedException, IOException { // Generates offsets of the already committed records for Kafka and passes them to the kafka consumers. try (HDFSRead hr = new HDFSRead(config, fs)) { - hdfsStartOffsets = hr.hdfsStartOffsets(); + hdfsStartOffsets.clear(); + hdfsStartOffsets.putAll(hr.hdfsStartOffsets()); LOGGER.debug("topicPartitionStartMap generated succesfully: <{}>", hdfsStartOffsets); } catch (IOException e) { throw new RuntimeException(e); } + boolean keepRunning = true; while (keepRunning) { if ("kerberos".equals(config.valueOf("hadoop.security.authentication"))) { UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java index bb45e7b1..4a29c019 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaAsSyslogRecord.java @@ -56,7 +56,7 @@ import java.time.Instant; import java.time.ZonedDateTime; -public class KafkaAsSyslogRecord { +public final class KafkaAsSyslogRecord { private final SDVector eventNodeSourceSource; private final SDVector eventNodeRelaySource; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java index 6b4c81bb..5cc24060 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java @@ -65,7 +65,7 @@ * @author Mikko Kortelainen */ @VisibleForTesting -public class MockKafkaConsumerFactory { +public final class MockKafkaConsumerFactory { final static private Logger LOGGER = LoggerFactory.getLogger(MockKafkaConsumerFactory.class); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 49b18dcf..de233ff8 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -57,7 +57,7 @@ import java.util.ArrayList; import java.util.List; -public class PartitionFileImpl implements PartitionFile { +public final class PartitionFileImpl implements PartitionFile { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index b143ba67..1b0ff972 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -54,14 +54,13 @@ import java.util.*; -public class ReadCoordinator implements Runnable { +public final class ReadCoordinator implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(ReadCoordinator.class); private final String queueTopic; - ConfigurationImpl config; + private final ConfigurationImpl config; private final BatchDistributionImpl callbackFunction; - private boolean run = true; private final Map hdfsStartOffsets; public ReadCoordinator( @@ -149,7 +148,7 @@ public void run() { config.toKafkaConsumerProperties(), queueTopic, callbackFunction, useMockKafkaConsumer ) ) { - while (run) { + while (true) { kafkaReader.read(); } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java index 6da05f6d..6ad9d5ae 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/SyslogAvroWriter.java @@ -57,7 +57,7 @@ import java.io.*; -public class SyslogAvroWriter implements AutoCloseable { +public final class SyslogAvroWriter implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(SyslogAvroWriter.class); From 47aba9527668c02bedd6b1ffba7a8f1520894002 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 10 Sep 2024 12:11:41 +0300 Subject: [PATCH 48/77] Implemented FileSystemFactory interface and class. Moved FileSystem object initialization code away from constructors. --- .../kafka/ConsumerRebalanceListenerImpl.java | 71 +--------- .../consumers/kafka/FileSystemFactory.java | 56 ++++++++ .../kafka/FileSystemFactoryImpl.java | 132 ++++++++++++++++++ .../cfe_39/consumers/kafka/HDFSWrite.java | 97 +------------ .../consumers/kafka/HdfsDataIngestion.java | 63 +-------- 5 files changed, 206 insertions(+), 213 deletions(-) create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index 6bbaca27..4108091e 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -47,9 +47,6 @@ import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; import org.apache.kafka.common.TopicPartition; @@ -57,7 +54,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.URI; import java.util.Collection; import java.util.HashMap; import java.util.Map; @@ -89,70 +85,17 @@ public void onPartitionsRevoked(Collection partitions) { @Override public void onPartitionsAssigned(Collection partitions) { - LOGGER.info("onPartitionsAssigned triggered"); // Generates offsets of the already committed records for Kafka and passes them to the kafka consumers. + LOGGER.info("onPartitionsAssigned triggered"); + // Initialize FileSystem + FileSystemFactoryImpl fileSystemFactoryImpl = new FileSystemFactoryImpl(config); FileSystem fs; - if (!"kerberos".equals(config.valueOf("hadoop.security.authentication"))) { - // Initializing the FileSystem with minicluster. - String hdfsuri = config.valueOf("hdfsuri"); - // ====== Init HDFS File System Object - HdfsConfiguration conf = new HdfsConfiguration(); - // Set FileSystem URI - conf.set("fs.defaultFS", hdfsuri); - // Because of Maven - conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); - conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - // Set HADOOP user - System.setProperty("HADOOP_USER_NAME", "hdfs"); - System.setProperty("hadoop.home.dir", "/"); - //Get the filesystem - HDFS - try { - fs = FileSystem.get(URI.create(hdfsuri), conf); - } - catch (IOException e) { - throw new RuntimeException(e); - } + try { + fs = fileSystemFactoryImpl.create(false); } - else { - // Initializing the FileSystem with kerberos. - String hdfsuri = config.valueOf("hdfsuri"); // Get from config. - // set kerberos host and realm - System.setProperty("java.security.krb5.realm", config.valueOf("java.security.krb5.realm")); - System.setProperty("java.security.krb5.kdc", config.valueOf("java.security.krb5.kdc")); - HdfsConfiguration conf = new HdfsConfiguration(); - // enable kerberus - conf.set("hadoop.security.authentication", config.valueOf("hadoop.security.authentication")); - conf.set("hadoop.security.authorization", config.valueOf("hadoop.security.authorization")); - conf.set("hadoop.kerberos.keytab.login.autorenewal.enabled", config.valueOf("kerberosLoginAutorenewal")); - conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI - conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? - conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? - /* hack for running locally with fake DNS records - set this to true if overriding the host name in /etc/hosts*/ - conf.set("dfs.client.use.datanode.hostname", config.valueOf("dfs.client.use.datanode.hostname")); - /* server principal - the kerberos principle that the namenode is using*/ - conf - .set( - "dfs.namenode.kerberos.principal.pattern", - config.valueOf("dfs.namenode.kerberos.principal.pattern") - ); - // set sasl - conf.set("dfs.data.transfer.protection", config.valueOf("dfs.data.transfer.protection")); - conf - .set( - "dfs.encrypt.data.transfer.cipher.suites", - config.valueOf("dfs.encrypt.data.transfer.cipher.suites") - ); - // filesystem for HDFS access is set here - try { - fs = FileSystem.get(conf); - } - catch (IOException e) { - throw new RuntimeException(e); - } + catch (IOException e) { + throw new RuntimeException(e); } - Map hdfsStartOffsets = new HashMap<>(); try (HDFSRead hr = new HDFSRead(config, fs)) { hdfsStartOffsets = hr.hdfsStartOffsets(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java new file mode 100644 index 00000000..0cbf1acf --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java @@ -0,0 +1,56 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import org.apache.hadoop.fs.FileSystem; + +import java.io.IOException; + +public interface FileSystemFactory { + + FileSystem create(boolean initializeUGI) throws IOException; + +} diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java new file mode 100644 index 00000000..65ededaf --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java @@ -0,0 +1,132 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.security.UserGroupInformation; + +import java.io.IOException; +import java.net.URI; + +public final class FileSystemFactoryImpl implements FileSystemFactory { + + private final HdfsConfiguration conf; + private final ConfigurationImpl configuration; + + public FileSystemFactoryImpl(ConfigurationImpl configuration) { + this.conf = new HdfsConfiguration(); + this.configuration = configuration; + } + + public FileSystem create(boolean initializeUGI) throws IOException { + FileSystem fs; + if ("kerberos".equals(configuration.valueOf("hadoop.security.authentication"))) { + // Initializing the FileSystem with kerberos. + String hdfsuri = configuration.valueOf("hdfsuri"); // Get from config. + // set kerberos host and realm + System.setProperty("java.security.krb5.realm", configuration.valueOf("java.security.krb5.realm")); + System.setProperty("java.security.krb5.kdc", configuration.valueOf("java.security.krb5.kdc")); + conf.clear(); + // enable kerberus + conf.set("hadoop.security.authentication", configuration.valueOf("hadoop.security.authentication")); + conf.set("hadoop.security.authorization", configuration.valueOf("hadoop.security.authorization")); + conf + .set( + "hadoop.kerberos.keytab.login.autorenewal.enabled", + configuration.valueOf("hadoop.kerberos.keytab.login.autorenewal.enabled") + ); + conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI + conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? + conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? + /* hack for running locally with fake DNS records + set this to true if overriding the host name in /etc/hosts*/ + conf.set("dfs.client.use.datanode.hostname", configuration.valueOf("dfs.client.use.datanode.hostname")); + /* server principal + the kerberos principle that the namenode is using*/ + conf + .set( + "dfs.namenode.kerberos.principal.pattern", + configuration.valueOf("dfs.namenode.kerberos.principal.pattern") + ); + // set sasl + conf.set("dfs.data.transfer.protection", configuration.valueOf("dfs.data.transfer.protection")); + conf + .set( + "dfs.encrypt.data.transfer.cipher.suites", + configuration.valueOf("dfs.encrypt.data.transfer.cipher.suites") + ); + if (initializeUGI) { + UserGroupInformation.setConfiguration(conf); + UserGroupInformation + .loginUserFromKeytab(configuration.valueOf("KerberosKeytabUser"), configuration.valueOf("KerberosKeytabPath")); + } + // filesystem for HDFS access is set here + fs = FileSystem.get(conf); + } + else { + // Initializing the FileSystem with minicluster. + String hdfsuri = configuration.valueOf("hdfsuri"); + // ====== Init HDFS File System Object + conf.clear(); + // Set FileSystem URI + conf.set("fs.defaultFS", hdfsuri); + // Because of Maven + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); + conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); + // Set HADOOP user + System.setProperty("HADOOP_USER_NAME", "hdfs"); + System.setProperty("hadoop.home.dir", "/"); + //Get the filesystem - HDFS + fs = FileSystem.get(URI.create(hdfsuri), conf); + } + return fs; + } + +} diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index 500b37a6..5fa1835c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -47,113 +47,30 @@ import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.fs.*; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; -import java.net.URI; -import java.util.Properties; public final class HDFSWrite implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(HDFSWrite.class); private final String fileName; private final String path; - private final FileSystem fs; - private final boolean useMockKafkaConsumer; // Defines if mock HDFS database is used for testing - private final HdfsConfiguration conf; - private final String hdfsuri; + private final ConfigurationImpl configuration; - public HDFSWrite(ConfigurationImpl config, String topic, String partition, long offset) throws IOException { - - Properties readerKafkaProperties = config.toKafkaConsumerProperties(); - this.useMockKafkaConsumer = Boolean - .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); - - if (useMockKafkaConsumer) { - // Code for initializing the class for mock hdfs database usage without kerberos. - hdfsuri = config.valueOf("hdfsuri"); - - /* The filepath should be something like hdfs:///opt/teragrep/cfe_39/srv/topic_name/0.12345 where 12345 is offset and 0 the partition. - In other words the directory named topic_name holds files that are named and arranged based on partition and the partition's offset. Every partition has its own set of unique offset values. - These values should be fetched from config and other input parameters (topic+partition+offset).*/ - path = config.valueOf("hdfsPath") + "/" + topic; - fileName = partition + "." + offset; // filename should be constructed from partition and offset. - - // ====== Init HDFS File System Object - conf = new HdfsConfiguration(); - // Set FileSystem URI - conf.set("fs.defaultFS", hdfsuri); - // Because of Maven - conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); - conf.set("fs.file.impl", LocalFileSystem.class.getName()); - // Set HADOOP user here. - System.setProperty("HADOOP_USER_NAME", "hdfs"); - System.setProperty("hadoop.home.dir", "/"); - // filesystem for HDFS access is set here - try { - fs = FileSystem.get(URI.create(hdfsuri), conf); - } - catch (IOException e) { - throw new RuntimeException(e); - } - - } - else { - // Code for initializing the class for kerberized HDFS database usage. - hdfsuri = config.valueOf("hdfsuri"); - - path = config.valueOf("hdfsPath") + "/" + topic; - fileName = partition + "." + offset; - - // set kerberos host and realm - System.setProperty("java.security.krb5.realm", config.valueOf("java.security.krb5.realm")); - System.setProperty("java.security.krb5.kdc", config.valueOf("java.security.krb5.kdc")); - - conf = new HdfsConfiguration(); - - // enable kerberus - conf.set("hadoop.security.authentication", config.valueOf("hadoop.security.authentication")); - conf.set("hadoop.security.authorization", config.valueOf("hadoop.security.authorization")); - conf - .set( - "hadoop.kerberos.keytab.login.autorenewal.enabled", - config.valueOf("hadoop.kerberos.keytab.login.autorenewal.enabled") - ); - - conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI - conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? - conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? - - // hack for running locally with fake DNS records, set this to true if overriding the host name in /etc/hosts - conf.set("dfs.client.use.datanode.hostname", config.valueOf("dfs.client.use.datanode.hostname")); - - // server principal, the kerberos principle that the namenode is using - conf - .set( - "dfs.namenode.kerberos.principal.pattern", - config.valueOf("dfs.namenode.kerberos.principal.pattern") - ); - - // set sasl - conf.set("dfs.data.transfer.protection", config.valueOf("dfs.data.transfer.protection")); - conf - .set( - "dfs.encrypt.data.transfer.cipher.suites", - config.valueOf("dfs.encrypt.data.transfer.cipher.suites") - ); - - // filesystem for HDFS access is set here - fs = FileSystem.get(conf); - } + public HDFSWrite(ConfigurationImpl config, String topic, String partition, long offset) { + this.configuration = config; + path = config.valueOf("hdfsPath") + "/" + topic; + fileName = partition + "." + offset; // filename should be constructed from partition and offset. } // Method for committing the AVRO-file to HDFS public void commit(File syslogFile) throws IOException { // The code for writing the file to HDFS should be same for both test (non-kerberized access) and prod (kerberized access). + FileSystemFactoryImpl fileSystemFactoryImpl = new FileSystemFactoryImpl(configuration); + FileSystem fs = fileSystemFactoryImpl.create(false); //==== Create directory if not exists Path workingDir = fs.getWorkingDirectory(); // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index f7090ce0..dc829cf7 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -49,9 +49,6 @@ import com.teragrep.cfe_39.metrics.*; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.PartitionInfo; @@ -61,7 +58,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.URI; import java.sql.SQLException; import java.time.Duration; import java.util.*; @@ -80,7 +76,6 @@ public final class HdfsDataIngestion { private final boolean useMockKafkaConsumer; private final int numOfConsumers; private final Map hdfsStartOffsets; - private final FileSystem fs; public HdfsDataIngestion(ConfigurationImpl config) throws IOException { this.config = config; @@ -90,20 +85,6 @@ public HdfsDataIngestion(ConfigurationImpl config) throws IOException { .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); if (useMockKafkaConsumer) { this.kafkaConsumer = MockKafkaConsumerFactory.getConsumer(0); // A consumer used only for scanning the available topics to be allocated to consumers running in different threads (thus 0 as input parameter). - // Initializing the FileSystem with minicluster. - String hdfsuri = config.valueOf("hdfsuri"); - // ====== Init HDFS File System Object - HdfsConfiguration conf = new HdfsConfiguration(); - // Set FileSystem URI - conf.set("fs.defaultFS", hdfsuri); - // Because of Maven - conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); - conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - // Set HADOOP user - System.setProperty("HADOOP_USER_NAME", "hdfs"); - System.setProperty("hadoop.home.dir", "/"); - //Get the filesystem - HDFS - fs = FileSystem.get(URI.create(hdfsuri), conf); } else { this.kafkaConsumer = new KafkaConsumer<>( @@ -111,46 +92,6 @@ public HdfsDataIngestion(ConfigurationImpl config) throws IOException { new ByteArrayDeserializer(), new ByteArrayDeserializer() ); - // Initializing the FileSystem with kerberos. - String hdfsuri = config.valueOf("hdfsuri"); // Get from config. - // set kerberos host and realm - System.setProperty("java.security.krb5.realm", config.valueOf("java.security.krb5.realm")); - System.setProperty("java.security.krb5.kdc", config.valueOf("java.security.krb5.kdc")); - HdfsConfiguration conf = new HdfsConfiguration(); - // enable kerberus - conf.set("hadoop.security.authentication", config.valueOf("hadoop.security.authentication")); - conf.set("hadoop.security.authorization", config.valueOf("hadoop.security.authorization")); - conf - .set( - "hadoop.kerberos.keytab.login.autorenewal.enabled", - config.valueOf("hadoop.kerberos.keytab.login.autorenewal.enabled") - ); - conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI - conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? - conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? - /* hack for running locally with fake DNS records - set this to true if overriding the host name in /etc/hosts*/ - conf.set("dfs.client.use.datanode.hostname", config.valueOf("dfs.client.use.datanode.hostname")); - /* server principal - the kerberos principle that the namenode is using*/ - conf - .set( - "dfs.namenode.kerberos.principal.pattern", - config.valueOf("dfs.namenode.kerberos.principal.pattern") - ); - // set sasl - conf.set("dfs.data.transfer.protection", config.valueOf("dfs.data.transfer.protection")); - conf - .set( - "dfs.encrypt.data.transfer.cipher.suites", - config.valueOf("dfs.encrypt.data.transfer.cipher.suites") - ); - // set usergroup stuff - UserGroupInformation.setConfiguration(conf); - UserGroupInformation - .loginUserFromKeytab(config.valueOf("KerberosKeytabUser"), config.valueOf("KerberosKeytabPath")); - // filesystem for HDFS access is set here - fs = FileSystem.get(conf); } hdfsStartOffsets = new HashMap<>(); } @@ -164,6 +105,10 @@ public void run() throws InterruptedException, IOException { // register per topic counting List topicCounters = new CopyOnWriteArrayList<>(); + // Initialize FileSystem + FileSystemFactoryImpl fileSystemFactoryImpl = new FileSystemFactoryImpl(config); + FileSystem fs = fileSystemFactoryImpl.create(true); + // Generates offsets of the already committed records for Kafka and passes them to the kafka consumers. try (HDFSRead hr = new HDFSRead(config, fs)) { hdfsStartOffsets.clear(); From da226e27787f2ff3dbbb9ed7f7ad5d330b8fa4b5 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 11 Sep 2024 08:49:19 +0300 Subject: [PATCH 49/77] Made BatchDistributionImpl, KafkaReader and PartitionRecordsImpl final. --- .../teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java | 2 +- .../java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java | 2 +- .../teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index 4f796fc5..343f9c94 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -60,7 +60,7 @@ The target where the record is stored in HDFS is based on the topic, partition and offset. ie. topic_name/0.123456 where offset is 123456 The mock consumer is activated for testing using the configuration file: readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")*/ -public class BatchDistributionImpl implements BatchDistribution { +public final class BatchDistributionImpl implements BatchDistribution { private static final Logger LOGGER = LoggerFactory.getLogger(BatchDistributionImpl.class); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index f5ca21b8..d3c1c444 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -55,7 +55,7 @@ import java.time.Instant; import java.util.*; -public class KafkaReader implements AutoCloseable { +public final class KafkaReader implements AutoCloseable { private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java index e0b2b2c0..dd55745a 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java @@ -54,7 +54,7 @@ import java.util.ArrayList; import java.util.List; -public class PartitionRecordsImpl implements PartitionRecords { +public final class PartitionRecordsImpl implements PartitionRecords { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionRecordsImpl.class); From dc6365d08b3960bd95af2f72304ab699fd4dc619 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 23 Sep 2024 11:11:36 +0300 Subject: [PATCH 50/77] Changed java version used by maven to java-1.8.0-openjdk, which altered the expected thrown error in failNullRFC5424DatabaseOutputTest(). --- .../java/com/teragrep/cfe_39/ProcessingFailureTest.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 0f33e7fe..df84c7da 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -207,11 +207,7 @@ public void failNullRFC5424DatabaseOutputTest() { recordOffsetObjectList.add(recordOffsetObject); RuntimeException e = Assertions .assertThrows(RuntimeException.class, () -> output.accept(recordOffsetObjectList)); - Assertions - .assertEquals( - "java.lang.NullPointerException: Cannot read the array length because \"buf\" is null", - e.getMessage() - ); + Assertions.assertEquals("java.lang.NullPointerException", e.getMessage()); Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. From 62ba79e2c2f5e8fdc8a2c3c02bdac68583da5c9f Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 26 Sep 2024 16:14:32 +0300 Subject: [PATCH 51/77] Implemented KafkaConfiguration.java, HdfsConfiguration.java, KafkaConfigurationValidation.java and HdfsConfigurationValidation.java. --- .../configuration/HdfsConfiguration.java | 103 ++++++++++++++++ .../HdfsConfigurationValidation.java | 114 ++++++++++++++++++ .../configuration/KafkaConfiguration.java | 103 ++++++++++++++++ .../KafkaConfigurationValidation.java | 112 +++++++++++++++++ 4 files changed, 432 insertions(+) create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java new file mode 100644 index 00000000..942df511 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java @@ -0,0 +1,103 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Properties; + +public class HdfsConfiguration implements Configuration { + + private final Logger LOGGER = LoggerFactory.getLogger(HdfsConfiguration.class); + private final Properties properties; + private final HdfsConfigurationValidation configurationValidation; + + public HdfsConfiguration() { + this.properties = new Properties(); + this.configurationValidation = new HdfsConfigurationValidation(); + } + + @Override + public void loadPropertiesFile(String configurationFile) throws IOException { + Path configPath = Paths.get(configurationFile); + LOGGER.info("Loading hdfs config <[{}]>", configPath.toAbsolutePath()); + try (InputStream inputStream = Files.newInputStream(configPath)) { + properties.load(inputStream); + LOGGER.debug("Got configuration: <{}>", properties); + configurationValidation.validate(properties); + } + } + + @Override + public void with(String key, String value) { + if (this.has(key)) { + properties.setProperty(key, value); + configurationValidation.validate(properties); + } + else { + throw new IllegalArgumentException("Key not found: " + key); + } + } + + @Override + public String valueOf(String key) { + if (this.has(key)) { + return properties.getProperty(key); + } + throw new IllegalArgumentException("Key not found: " + key); + } + + @Override + public boolean has(String key) { + return properties.containsKey(key); + } +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java new file mode 100644 index 00000000..70bf5e97 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java @@ -0,0 +1,114 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +public class HdfsConfigurationValidation implements ConfigurationValidation { + + private final Logger LOGGER = LoggerFactory.getLogger(HdfsConfigurationValidation.class); + private final Set requiredKeys; + + public HdfsConfigurationValidation() { + this.requiredKeys = new HashSet<>(); + } + + @Override + public void validate(Properties properties) { + validateKeys(properties); + validateValues(properties); + } + + private void validateKeys(Properties properties) { + if (requiredKeys.isEmpty()) { + loadRequiredKeys(); + } + int requiredCount = 0; + for (Map.Entry keyValuePair : properties.entrySet()) { + if (requiredKeys.contains(keyValuePair.getKey().toString())) { + requiredCount++; + } + else { + throw new IllegalStateException("Unauthorized key " + keyValuePair.getKey().toString()); + } + } + if (requiredCount < requiredKeys.size()) { + for (String key : requiredKeys) { + if (!properties.containsKey(key)) { + throw new IllegalStateException("Missing required key " + key); + } + } + } + } + + private void validateValues(Properties properties) { + } + + private void loadRequiredKeys() { + // HDFS + requiredKeys.add("hdfsPath"); + requiredKeys.add("hdfsuri"); + requiredKeys.add("dfs.client.use.datanode.hostname"); + requiredKeys.add("dfs.data.transfer.protection"); + requiredKeys.add("dfs.encrypt.data.transfer.cipher.suites"); + // Kerberos + requiredKeys.add("hadoop.security.authentication"); + requiredKeys.add("hadoop.security.authorization"); + requiredKeys.add("dfs.namenode.kerberos.principal.pattern"); + requiredKeys.add("java.security.krb5.kdc"); + requiredKeys.add("java.security.krb5.realm"); + requiredKeys.add("KerberosKeytabUser"); + requiredKeys.add("KerberosKeytabPath"); + requiredKeys.add("hadoop.kerberos.keytab.login.autorenewal.enabled"); + } + +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java new file mode 100644 index 00000000..c70114d7 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java @@ -0,0 +1,103 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Properties; + +public class KafkaConfiguration implements Configuration { + + private final Logger LOGGER = LoggerFactory.getLogger(KafkaConfiguration.class); + private final Properties properties; + private final KafkaConfigurationValidation configurationValidation; + + public KafkaConfiguration() { + this.properties = new Properties(); + this.configurationValidation = new KafkaConfigurationValidation(); + } + + @Override + public void loadPropertiesFile(String configurationFile) throws IOException { + Path configPath = Paths.get(configurationFile); + LOGGER.info("Loading hdfs config <[{}]>", configPath.toAbsolutePath()); + try (InputStream inputStream = Files.newInputStream(configPath)) { + properties.load(inputStream); + LOGGER.debug("Got configuration: <{}>", properties); + configurationValidation.validate(properties); + } + } + + @Override + public void with(String key, String value) { + if (this.has(key)) { + properties.setProperty(key, value); + configurationValidation.validate(properties); + } + else { + throw new IllegalArgumentException("Key not found: " + key); + } + } + + @Override + public String valueOf(String key) { + if (this.has(key)) { + return properties.getProperty(key); + } + throw new IllegalArgumentException("Key not found: " + key); + } + + @Override + public boolean has(String key) { + return properties.containsKey(key); + } +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java new file mode 100644 index 00000000..5095399d --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java @@ -0,0 +1,112 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +public class KafkaConfigurationValidation implements ConfigurationValidation { + + private final Logger LOGGER = LoggerFactory.getLogger(KafkaConfigurationValidation.class); + private final Set requiredKeys; + + public KafkaConfigurationValidation() { + this.requiredKeys = new HashSet<>(); + } + + @Override + public void validate(Properties properties) { + validateKeys(properties); + validateValues(properties); + } + + private void validateKeys(Properties properties) { + if (requiredKeys.isEmpty()) { + loadRequiredKeys(); + } + int requiredCount = 0; + for (Map.Entry keyValuePair : properties.entrySet()) { + if (requiredKeys.contains(keyValuePair.getKey().toString())) { + requiredCount++; + } + else { + throw new IllegalStateException("Unauthorized key " + keyValuePair.getKey().toString()); + } + } + if (requiredCount < requiredKeys.size()) { + for (String key : requiredKeys) { + if (!properties.containsKey(key)) { + throw new IllegalStateException("Missing required key " + key); + } + } + } + } + + private void validateValues(Properties properties) { + } + + private void loadRequiredKeys() { + // kafka + requiredKeys.add("java.security.auth.login.config"); + requiredKeys.add("bootstrap.servers"); + requiredKeys.add("auto.offset.reset"); + requiredKeys.add("enable.auto.commit"); + requiredKeys.add("group.id"); + requiredKeys.add("security.protocol"); + requiredKeys.add("sasl.mechanism"); + requiredKeys.add("max.poll.records"); + requiredKeys.add("fetch.max.bytes"); + requiredKeys.add("request.timeout.ms"); + requiredKeys.add("max.poll.interval.ms"); + requiredKeys.add("useMockKafkaConsumer"); + } + +} From 25c2def099a5b3fdef6686e86858874cacbdd305 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 26 Sep 2024 16:15:42 +0300 Subject: [PATCH 52/77] Split properties of ingress (kafka) and egress (hdfs) libraries to different files, leaving strictly application configuration to the main application.properties file. --- pom.xml | 4 ++ rpm/resources/application.properties | 48 +++---------------- rpm/resources/egress.properties | 16 +++++++ rpm/resources/ingress.properties | 22 +++++++++ .../resources/broken.application.properties | 44 +++-------------- .../failProcessing.application.properties | 44 ++--------------- .../largeFile.application.properties | 44 ++--------------- .../resources/valid.application.properties | 44 ++--------------- src/test/resources/valid.hdfs.properties | 18 +++++++ src/test/resources/valid.kafka.properties | 22 +++++++++ 10 files changed, 110 insertions(+), 196 deletions(-) create mode 100644 rpm/resources/egress.properties create mode 100644 rpm/resources/ingress.properties create mode 100644 src/test/resources/valid.hdfs.properties create mode 100644 src/test/resources/valid.kafka.properties diff --git a/pom.xml b/pom.xml index 86edd078..9160621e 100644 --- a/pom.xml +++ b/pom.xml @@ -203,6 +203,10 @@ rpm/resources/config.jaas rpm/resources/log4j2.properties rpm/resources/application.properties + rpm/resources/ingress.properties + rpm/resources/egress.properties + src/test/resources/valid.hdfs.properties + src/test/resources/valid.kafka.properties rpm/resources/cfe_39.service rpm/rpm.pom.xml src/main/java/com/teragrep/cfe_39/avro/SyslogRecord.java diff --git a/rpm/resources/application.properties b/rpm/resources/application.properties index 5e1ffd99..2d2bce5a 100644 --- a/rpm/resources/application.properties +++ b/rpm/resources/application.properties @@ -1,56 +1,22 @@ -# Kafka security configuration file -java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas # Logger settings log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# hdfs settings +egress.configurationFile=/opt/teragrep/cfe_39/etc/egress.properties +# kafka settings +ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 -# Kafka bootstrap servers -bootstrap.servers=test -# Offset, should not be touched -auto.offset.reset=earliest -# Autocommit, should not be touched -enable.auto.commit=false -# Consumer group id, this is to track the progress of reading hte topic -group.id=cfe_39 -# Used security protocol and mechanism -security.protocol=SASL_PLAINTEXT -sasl.mechanism=PLAIN -# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -max.poll.records=500 -# How much data can be fetched in one go -fetch.max.bytes=1073741820 -# How long for request before timing out. Note that too big max poll records size can cause this to trigger -request.timeout.ms=300000 -max.poll.interval.ms=300000 -# For testing only -useMockKafkaConsumer=false # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. -maximumFileSize=60800000 +maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. skipNonRFC5424Records=true # Boolean for deciding if empty RFC5424 records should be skipped or not. skipEmptyRFC5424Records=true -# HDFS pruning offset, prunes files older than the given milliseconds. -pruneOffset=172800000 -# HDFS uri -hdfsuri=hdfs://localhost:45937/ -# HDFS path -hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ -# Kerberos -java.security.krb5.kdc=test -java.security.krb5.realm=test -hadoop.security.authentication=kerberos -hadoop.security.authorization=test -dfs.namenode.kerberos.principal.pattern=test -KerberosKeytabUser=test -KerberosKeytabPath=test -dfs.client.use.datanode.hostname=false -kerberosLoginAutorenewal=true -dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test +# HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L +pruneOffset=157784760000 # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS consumerTimeout=300000 \ No newline at end of file diff --git a/rpm/resources/egress.properties b/rpm/resources/egress.properties new file mode 100644 index 00000000..dbc5adf7 --- /dev/null +++ b/rpm/resources/egress.properties @@ -0,0 +1,16 @@ +# HDFS uri +hdfsuri=hdfs://localhost:45937/ +# HDFS path +hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ +# Kerberos +java.security.krb5.kdc=test +java.security.krb5.realm=test +hadoop.security.authentication=kerberos +hadoop.security.authorization=test +dfs.namenode.kerberos.principal.pattern=test +KerberosKeytabUser=test +KerberosKeytabPath=test +dfs.client.use.datanode.hostname=false +hadoop.kerberos.keytab.login.autorenewal.enabled=true +dfs.data.transfer.protection=test +dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file diff --git a/rpm/resources/ingress.properties b/rpm/resources/ingress.properties new file mode 100644 index 00000000..b23a2775 --- /dev/null +++ b/rpm/resources/ingress.properties @@ -0,0 +1,22 @@ +# Kafka security configuration file +java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas +# Kafka bootstrap servers +bootstrap.servers=test +# Offset, should not be touched +auto.offset.reset=earliest +# Autocommit, should not be touched +enable.auto.commit=false +# Consumer group id, this is to track the progress of reading hte topic +group.id=cfe_39 +# Used security protocol and mechanism +security.protocol=SASL_PLAINTEXT +sasl.mechanism=PLAIN +# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger +max.poll.records=500 +# How much data can be fetched in one go +fetch.max.bytes=1073741820 +# How long for request before timing out. Note that too big max poll records size can cause this to trigger +request.timeout.ms=300000 +max.poll.interval.ms=300000 +# For testing only +useMockKafkaConsumer=true diff --git a/src/test/resources/broken.application.properties b/src/test/resources/broken.application.properties index 586601f0..2c08db30 100644 --- a/src/test/resources/broken.application.properties +++ b/src/test/resources/broken.application.properties @@ -1,31 +1,13 @@ -# Kafka security configuration file -java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas # Logger settings -log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# hdfs settings +#egress.configurationFile=/opt/teragrep/cfe_39/etc/egress.properties +# kafka settings +#ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups -numOfConsumers=2 -# Kafka bootstrap servers -bootstrap.servers=test -# Offset, should not be touched -auto.offset.reset=earliest -# Autocommit, should not be touched -enable.auto.commit=false -# Consumer group id, this is to track the progress of reading hte topic -group.id=cfe_39 -# Used security protocol and mechanism -security.protocol=SASL_PLAINTEXT -sasl.mechanism=PLAIN -# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -max.poll.records=500 -# How much data can be fetched in one go -fetch.max.bytes=1073741820 -# How long for request before timing out. Note that too big max poll records size can cause this to trigger -request.timeout.ms=300000 -max.poll.interval.ms=300000 -# For testing only -useMockKafkaConsumer=true +# numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. @@ -36,19 +18,5 @@ skipNonRFC5424Records=true skipEmptyRFC5424Records=true # HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L pruneOffset=157784760000 -# HDFS path -hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ -# Kerberos -java.security.krb5.kdc=test -java.security.krb5.realm=test -hadoop.security.authentication=test -hadoop.security.authorization=test -dfs.namenode.kerberos.principal.pattern=test -KerberosKeytabUser=test -KerberosKeytabPath=test -dfs.client.use.datanode.hostname=false -kerberosLoginAutorenewal=true -dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS consumerTimeout=300000 \ No newline at end of file diff --git a/src/test/resources/failProcessing.application.properties b/src/test/resources/failProcessing.application.properties index bdaf3507..2a6d8e7b 100644 --- a/src/test/resources/failProcessing.application.properties +++ b/src/test/resources/failProcessing.application.properties @@ -1,31 +1,13 @@ -# Kafka security configuration file -java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas # Logger settings -log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# hdfs settings +#egress.configurationFile=/opt/teragrep/cfe_39/etc/egress.properties +# kafka settings +#ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 -# Kafka bootstrap servers -bootstrap.servers=test -# Offset, should not be touched -auto.offset.reset=earliest -# Autocommit, should not be touched -enable.auto.commit=false -# Consumer group id, this is to track the progress of reading hte topic -group.id=cfe_39 -# Used security protocol and mechanism -security.protocol=SASL_PLAINTEXT -sasl.mechanism=PLAIN -# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -max.poll.records=500 -# How much data can be fetched in one go -fetch.max.bytes=1073741820 -# How long for request before timing out. Note that too big max poll records size can cause this to trigger -request.timeout.ms=300000 -max.poll.interval.ms=300000 -# For testing only -useMockKafkaConsumer=true # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. @@ -36,21 +18,5 @@ skipNonRFC5424Records=false skipEmptyRFC5424Records=false # HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L pruneOffset=157784760000 -# HDFS uri -hdfsuri=hdfs://localhost:45937/ -# HDFS path -hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ -# Kerberos -java.security.krb5.kdc=test -java.security.krb5.realm=test -hadoop.security.authentication=test -hadoop.security.authorization=test -dfs.namenode.kerberos.principal.pattern=test -KerberosKeytabUser=test -KerberosKeytabPath=test -dfs.client.use.datanode.hostname=false -kerberosLoginAutorenewal=true -dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS consumerTimeout=300000 \ No newline at end of file diff --git a/src/test/resources/largeFile.application.properties b/src/test/resources/largeFile.application.properties index 33162ea1..e0bd58f4 100644 --- a/src/test/resources/largeFile.application.properties +++ b/src/test/resources/largeFile.application.properties @@ -1,31 +1,13 @@ -# Kafka security configuration file -java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas # Logger settings -log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# hdfs settings +#egress.configurationFile=/opt/teragrep/cfe_39/etc/egress.properties +# kafka settings +#ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 -# Kafka bootstrap servers -bootstrap.servers=test -# Offset, should not be touched -auto.offset.reset=earliest -# Autocommit, should not be touched -enable.auto.commit=false -# Consumer group id, this is to track the progress of reading hte topic -group.id=cfe_39 -# Used security protocol and mechanism -security.protocol=SASL_PLAINTEXT -sasl.mechanism=PLAIN -# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -max.poll.records=500 -# How much data can be fetched in one go -fetch.max.bytes=1073741820 -# How long for request before timing out. Note that too big max poll records size can cause this to trigger -request.timeout.ms=300000 -max.poll.interval.ms=300000 -# For testing only -useMockKafkaConsumer=true # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. @@ -36,21 +18,5 @@ skipNonRFC5424Records=true skipEmptyRFC5424Records=true # HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L pruneOffset=157784760000 -# HDFS uri -hdfsuri=hdfs://localhost:45937/ -# HDFS path -hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ -# Kerberos -java.security.krb5.kdc=test -java.security.krb5.realm=test -hadoop.security.authentication=test -hadoop.security.authorization=test -dfs.namenode.kerberos.principal.pattern=test -KerberosKeytabUser=test -KerberosKeytabPath=test -dfs.client.use.datanode.hostname=false -kerberosLoginAutorenewal=true -dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS consumerTimeout=300000 \ No newline at end of file diff --git a/src/test/resources/valid.application.properties b/src/test/resources/valid.application.properties index 19ea1b26..8f312cfa 100644 --- a/src/test/resources/valid.application.properties +++ b/src/test/resources/valid.application.properties @@ -1,31 +1,13 @@ -# Kafka security configuration file -java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas # Logger settings -log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# log4j2.configurationFile=/opt/teragrep/cfe_39/etc/log4j2.properties +# hdfs settings +#egress.configurationFile=/opt/teragrep/cfe_39/etc/egress.properties +# kafka settings +#ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ # Number of consumers created to the consumer groups numOfConsumers=2 -# Kafka bootstrap servers -bootstrap.servers=test -# Offset, should not be touched -auto.offset.reset=earliest -# Autocommit, should not be touched -enable.auto.commit=false -# Consumer group id, this is to track the progress of reading hte topic -group.id=cfe_39 -# Used security protocol and mechanism -security.protocol=SASL_PLAINTEXT -sasl.mechanism=PLAIN -# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger -max.poll.records=500 -# How much data can be fetched in one go -fetch.max.bytes=1073741820 -# How long for request before timing out. Note that too big max poll records size can cause this to trigger -request.timeout.ms=300000 -max.poll.interval.ms=300000 -# For testing only -useMockKafkaConsumer=true # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # The maximum file size for AVRO-files that are to be stored in HDFS database. @@ -36,21 +18,5 @@ skipNonRFC5424Records=true skipEmptyRFC5424Records=true # HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L pruneOffset=157784760000 -# HDFS uri -hdfsuri=hdfs://localhost:45937/ -# HDFS path -hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ -# Kerberos -java.security.krb5.kdc=test -java.security.krb5.realm=test -hadoop.security.authentication=test -hadoop.security.authorization=test -dfs.namenode.kerberos.principal.pattern=test -KerberosKeytabUser=test -KerberosKeytabPath=test -dfs.client.use.datanode.hostname=false -kerberosLoginAutorenewal=true -dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS consumerTimeout=300000 \ No newline at end of file diff --git a/src/test/resources/valid.hdfs.properties b/src/test/resources/valid.hdfs.properties new file mode 100644 index 00000000..864b019a --- /dev/null +++ b/src/test/resources/valid.hdfs.properties @@ -0,0 +1,18 @@ +# HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L +pruneOffset=157784760000 +# HDFS uri +hdfsuri=hdfs://localhost:45937/ +# HDFS path +hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/ +# Kerberos +java.security.krb5.kdc=test +java.security.krb5.realm=test +hadoop.security.authentication=kerberos +hadoop.security.authorization=test +dfs.namenode.kerberos.principal.pattern=test +KerberosKeytabUser=test +KerberosKeytabPath=test +dfs.client.use.datanode.hostname=false +hadoop.kerberos.keytab.login.autorenewal.enabled=true +dfs.data.transfer.protection=test +dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file diff --git a/src/test/resources/valid.kafka.properties b/src/test/resources/valid.kafka.properties new file mode 100644 index 00000000..b167b3e3 --- /dev/null +++ b/src/test/resources/valid.kafka.properties @@ -0,0 +1,22 @@ +# Kafka security configuration file +java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas +# Kafka bootstrap servers +bootstrap.servers=test +# Offset, should not be touched +auto.offset.reset=earliest +# Autocommit, should not be touched +enable.auto.commit=false +# Consumer group id, this is to track the progress of reading hte topic +group.id=cfe_39 +# Used security protocol and mechanism +security.protocol=SASL_PLAINTEXT +sasl.mechanism=PLAIN +# Maximum records per batch, note that too big number will cause massive load and can cause timeouts to trigger +max.poll.records=500 +# How much data can be fetched in one go +fetch.max.bytes=1073741820 +# How long for request before timing out. Note that too big max poll records size can cause this to trigger +request.timeout.ms=300000 +max.poll.interval.ms=300000 +# For testing only +useMockKafkaConsumer=true \ No newline at end of file From 74508d828000ef58dd84afd81c44392dc2352772 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 26 Sep 2024 16:23:53 +0300 Subject: [PATCH 53/77] Refactored Configuration.java, ConfigurationImpl.java and ConfigurationValidationImpl.java to support the multiple properties file implementation. --- .../cfe_39/configuration/Configuration.java | 9 +- .../configuration/ConfigurationImpl.java | 95 +++++++++++++------ .../ConfigurationValidationImpl.java | 46 +++------ 3 files changed, 81 insertions(+), 69 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java index 9d6f7727..0b55191d 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java @@ -46,18 +46,15 @@ package com.teragrep.cfe_39.configuration; import java.io.IOException; -import java.util.Properties; public interface Configuration { - ConfigurationImpl loadPropertiesFile() throws IOException; + void loadPropertiesFile(String configurationFile) throws IOException; - Configuration with(String key, String value); + void with(String key, String value); String valueOf(String key); - Properties toKafkaConsumerProperties(); - - void configureLogging() throws IOException; + boolean has(String key); } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java index e0768e92..2613a384 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java @@ -56,71 +56,104 @@ import java.nio.file.Paths; import java.util.Properties; +// This class will only hold the common configuration parameters. Rename to CommonConfiguration? public final class ConfigurationImpl implements Configuration { private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationImpl.class); + private final Properties properties; private final ConfigurationValidationImpl configurationValidationImpl; + private final HdfsConfiguration hdfsConfiguration; + private final KafkaConfiguration kafkaConfiguration; public ConfigurationImpl() { - this(new Properties()); + this(new Properties(), new HdfsConfiguration(), new KafkaConfiguration()); } - public ConfigurationImpl(Properties properties) { + // This approach should be fine. The passed properties can be used for sourcing the other properties files. + public ConfigurationImpl( + Properties properties, + HdfsConfiguration hdfsConfiguration, + KafkaConfiguration kafkaConfiguration + ) { this.properties = properties; - configurationValidationImpl = new ConfigurationValidationImpl(); + this.hdfsConfiguration = hdfsConfiguration; // Initializes HdfsConfiguration + this.kafkaConfiguration = kafkaConfiguration; // Initializes KafkaConfiguration + this.configurationValidationImpl = new ConfigurationValidationImpl(); } + // This method should load the common properties belonging to this configuration object, but it should also ask the other configuration objects to do the same. @Override - public ConfigurationImpl loadPropertiesFile() throws IOException { - final Properties newProperties = new Properties(); - Path configPath = Paths - .get(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + public void loadPropertiesFile(String configurationFile) throws IOException { + Path configPath = Paths.get(configurationFile); LOGGER.info("Loading application config <[{}]>", configPath.toAbsolutePath()); try (InputStream inputStream = Files.newInputStream(configPath)) { - newProperties.load(inputStream); - LOGGER.debug("Got configuration: <{}>", newProperties); + properties.load(inputStream); + LOGGER.debug("Got configuration: <{}>", properties); + configurationValidationImpl.validate(properties); } - configurationValidationImpl.validate(newProperties); - return new ConfigurationImpl(newProperties); + // also load the hdfs and kafka configuration files. + hdfsConfiguration + .loadPropertiesFile(properties.getProperty("egress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/egress.properties")); + kafkaConfiguration + .loadPropertiesFile(properties.getProperty("ingress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/ingress.properties")); + configureLogging(); } + // Used only during testing to change existing property values, make a fake for this. @Override - public ConfigurationImpl with(String key, String value) { - final Properties newProperties = new Properties(); - newProperties.putAll(properties); - newProperties.setProperty(key, value); - configurationValidationImpl.validate(newProperties); - return new ConfigurationImpl(newProperties); + public void with(String key, String value) { + if (this.has(key)) { + properties.setProperty(key, value); + configurationValidationImpl.validate(properties); + } + else if (hdfsConfiguration.has(key)) { + hdfsConfiguration.with(key, value); + } + else if (kafkaConfiguration.has(key)) { + kafkaConfiguration.with(key, value); + } + else { + throw new IllegalArgumentException("Key not found: " + key); + } } @Override public String valueOf(String key) { - if (properties.containsKey(key)) { + if (this.has(key)) { return properties.getProperty(key); } + if (kafkaConfiguration.has(key)) { + return kafkaConfiguration.valueOf(key); + } + if (hdfsConfiguration.has(key)) { + return hdfsConfiguration.valueOf(key); + } throw new IllegalArgumentException("Key not found: " + key); } @Override + public boolean has(String key) { + return properties.containsKey(key); + } + public Properties toKafkaConsumerProperties() { Properties kafkaProperties = new Properties(); - kafkaProperties.put("bootstrap.servers", valueOf("bootstrap.servers")); - kafkaProperties.put("auto.offset.reset", valueOf("auto.offset.reset")); - kafkaProperties.put("enable.auto.commit", valueOf("enable.auto.commit")); - kafkaProperties.put("group.id", valueOf("group.id")); - kafkaProperties.put("security.protocol", valueOf("security.protocol")); - kafkaProperties.put("sasl.mechanism", valueOf("sasl.mechanism")); - kafkaProperties.put("max.poll.records", valueOf("max.poll.records")); - kafkaProperties.put("fetch.max.bytes", valueOf("fetch.max.bytes")); - kafkaProperties.put("request.timeout.ms", valueOf("request.timeout.ms")); - kafkaProperties.put("max.poll.interval.ms", valueOf("max.poll.interval.ms")); - kafkaProperties.put("useMockKafkaConsumer", valueOf("useMockKafkaConsumer")); + kafkaProperties.put("bootstrap.servers", kafkaConfiguration.valueOf("bootstrap.servers")); + kafkaProperties.put("auto.offset.reset", kafkaConfiguration.valueOf("auto.offset.reset")); + kafkaProperties.put("enable.auto.commit", kafkaConfiguration.valueOf("enable.auto.commit")); + kafkaProperties.put("group.id", kafkaConfiguration.valueOf("group.id")); + kafkaProperties.put("security.protocol", kafkaConfiguration.valueOf("security.protocol")); + kafkaProperties.put("sasl.mechanism", kafkaConfiguration.valueOf("sasl.mechanism")); + kafkaProperties.put("max.poll.records", kafkaConfiguration.valueOf("max.poll.records")); + kafkaProperties.put("fetch.max.bytes", kafkaConfiguration.valueOf("fetch.max.bytes")); + kafkaProperties.put("request.timeout.ms", kafkaConfiguration.valueOf("request.timeout.ms")); + kafkaProperties.put("max.poll.interval.ms", kafkaConfiguration.valueOf("max.poll.interval.ms")); + kafkaProperties.put("useMockKafkaConsumer", kafkaConfiguration.valueOf("useMockKafkaConsumer")); return kafkaProperties; } - @Override - public void configureLogging() throws IOException { + private void configureLogging() throws IOException { // Just for loggers to work Path log4j2Config = Paths .get(properties.getProperty("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties")); diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java index e11c93b6..654306ca 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java @@ -54,26 +54,30 @@ public final class ConfigurationValidationImpl implements ConfigurationValidatio private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationValidationImpl.class); private final Set requiredKeys; + private final Set optionalKeys; public ConfigurationValidationImpl() { this.requiredKeys = new HashSet<>(); + this.optionalKeys = new HashSet<>(); } + @Override public void validate(Properties properties) { validateKeys(properties); validateValues(properties); } private void validateKeys(Properties properties) { - if (requiredKeys.isEmpty()) { + if (requiredKeys.isEmpty() && optionalKeys.isEmpty()) { loadRequiredKeys(); + loadOptionalKeys(); } int requiredCount = 0; for (Map.Entry keyValuePair : properties.entrySet()) { if (requiredKeys.contains(keyValuePair.getKey().toString())) { requiredCount++; } - else { + else if (!optionalKeys.contains(keyValuePair.getKey().toString())) { throw new IllegalStateException("Unauthorized key " + keyValuePair.getKey().toString()); } } @@ -111,7 +115,7 @@ private void validateValues(Properties properties) { } private void loadRequiredKeys() { - // Common + // Required keys requiredKeys.add("pruneOffset"); requiredKeys.add("queueDirectory"); requiredKeys.add("maximumFileSize"); @@ -120,35 +124,13 @@ private void loadRequiredKeys() { requiredKeys.add("consumerTimeout"); requiredKeys.add("skipNonRFC5424Records"); requiredKeys.add("skipEmptyRFC5424Records"); - requiredKeys.add("log4j2.configurationFile"); - // kafka - requiredKeys.add("java.security.auth.login.config"); - requiredKeys.add("bootstrap.servers"); - requiredKeys.add("auto.offset.reset"); - requiredKeys.add("enable.auto.commit"); - requiredKeys.add("group.id"); - requiredKeys.add("security.protocol"); - requiredKeys.add("sasl.mechanism"); - requiredKeys.add("max.poll.records"); - requiredKeys.add("fetch.max.bytes"); - requiredKeys.add("request.timeout.ms"); - requiredKeys.add("max.poll.interval.ms"); - requiredKeys.add("useMockKafkaConsumer"); - // HDFS - requiredKeys.add("hdfsPath"); - requiredKeys.add("hdfsuri"); - requiredKeys.add("dfs.client.use.datanode.hostname"); - requiredKeys.add("dfs.data.transfer.protection"); - requiredKeys.add("dfs.encrypt.data.transfer.cipher.suites"); - // Kerberos - requiredKeys.add("hadoop.security.authentication"); - requiredKeys.add("hadoop.security.authorization"); - requiredKeys.add("dfs.namenode.kerberos.principal.pattern"); - requiredKeys.add("java.security.krb5.kdc"); - requiredKeys.add("java.security.krb5.realm"); - requiredKeys.add("KerberosKeytabUser"); - requiredKeys.add("KerberosKeytabPath"); - requiredKeys.add("kerberosLoginAutorenewal"); + } + + private void loadOptionalKeys() { + // Optional keys that have default values in place. + optionalKeys.add("log4j2.configurationFile"); + optionalKeys.add("ingress.configurationFile"); + optionalKeys.add("egress.configurationFile"); } } From afd454514fd548a11ba227026179d26a76a0807b Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 26 Sep 2024 16:26:00 +0300 Subject: [PATCH 54/77] Refactored Main.java and all the tests to use the refactored configuration classes. --- src/main/java/com/teragrep/cfe_39/Main.java | 9 ++- .../cfe_39/BatchDistributionTest.java | 16 ++-- .../teragrep/cfe_39/ConfigurationTest.java | 75 ++++++++++++++----- .../java/com/teragrep/cfe_39/HdfsTest.java | 16 ++-- .../cfe_39/Ingestion0FilesLowSizeTest.java | 19 +++-- .../teragrep/cfe_39/Ingestion0FilesTest.java | 19 +++-- .../cfe_39/Ingestion1Old1NewFileTest.java | 19 +++-- .../cfe_39/Ingestion2NewFilesTest.java | 20 +++-- .../cfe_39/Ingestion2OldFilesTest.java | 19 +++-- .../teragrep/cfe_39/KafkaConsumerTest.java | 19 +++-- .../cfe_39/ProcessingFailureTest.java | 17 +++-- .../teragrep/cfe_39/PruningNoFilesTest.java | 17 +++-- .../cfe_39/PruningOneNewFileTest.java | 17 +++-- .../cfe_39/PruningOneOldFileTest.java | 17 +++-- .../cfe_39/PruningOneOldOneNewFileTest.java | 17 +++-- .../cfe_39/PruningTwoNewFilesTest.java | 17 +++-- .../cfe_39/PruningTwoOldFilesTest.java | 17 +++-- .../teragrep/cfe_39/SyslogAvroWriterTest.java | 15 ++-- 18 files changed, 244 insertions(+), 121 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index c6f3aa31..836be54a 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -59,8 +59,13 @@ public final class Main { public static void main(String[] args) throws Exception { ConfigurationImpl config = new ConfigurationImpl(); try { - config = config.loadPropertiesFile(); - config.configureLogging(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); } catch (IOException e) { LOGGER.error("Can't load config: ", e); diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 1df64890..3a9380e6 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -92,15 +92,19 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } diff --git a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java index 5b71bb5a..239a0f21 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java @@ -65,7 +65,9 @@ public void kafkaPropertiesConfigurationTest() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); + ConfigurationImpl configuration = new ConfigurationImpl(); + configuration + .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); Properties readerKafkaProperties = configuration.toKafkaConsumerProperties(); // Test extracting useMockKafkaConsumer value from config. boolean useMockKafkaConsumer = Boolean @@ -81,9 +83,11 @@ public void brokenConfigurationTest() { System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); Exception e = Assertions.assertThrows(Exception.class, () -> { - ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile(); + ConfigurationImpl configuration = new ConfigurationImpl(); + configuration + .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); }); - Assertions.assertEquals("Missing required key hdfsuri", e.getMessage()); + Assertions.assertEquals("Missing required key numOfConsumers", e.getMessage()); } @Test @@ -92,15 +96,23 @@ public void configurationEqualityTest() { System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); assertDoesNotThrow(() -> { - ConfigurationImpl configuration1 = new ConfigurationImpl().loadPropertiesFile(); - ConfigurationImpl configuration2 = new ConfigurationImpl().loadPropertiesFile(); - ConfigurationImpl configuration3 = new ConfigurationImpl().loadPropertiesFile(); - ConfigurationImpl configuration4 = new ConfigurationImpl().loadPropertiesFile(); + ConfigurationImpl configuration1 = new ConfigurationImpl(); + configuration1 + .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + ConfigurationImpl configuration2 = new ConfigurationImpl(); + configuration2 + .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + ConfigurationImpl configuration3 = new ConfigurationImpl(); + configuration3 + .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + ConfigurationImpl configuration4 = new ConfigurationImpl(); + configuration4 + .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); Assertions.assertNotEquals(configuration1, configuration2); Assertions.assertNotEquals(configuration1, configuration3); Assertions.assertNotEquals(configuration3, configuration4); - configuration3 = configuration3.with("hdfsuri", "12345"); - configuration4 = configuration4.with("hdfsuri", "12345"); + configuration3.with("hdfsuri", "12345"); + configuration4.with("hdfsuri", "12345"); Assertions.assertNotEquals(configuration1, configuration3); Assertions.assertNotEquals(configuration3, configuration4); }); @@ -112,8 +124,23 @@ public void configurationWithTest() { System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); assertDoesNotThrow(() -> { - ConfigurationImpl configuration1 = new ConfigurationImpl().loadPropertiesFile(); - ConfigurationImpl configuration2 = new ConfigurationImpl().loadPropertiesFile().with("hdfsuri", "12345"); + ConfigurationImpl configuration1 = new ConfigurationImpl(); + configuration1 + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); + ConfigurationImpl configuration2 = new ConfigurationImpl(); + configuration2 + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); + configuration2.with("hdfsuri", "12345"); Assertions.assertEquals(configuration1.valueOf("hdfsuri"), "hdfs://localhost:45937/"); Assertions.assertEquals(configuration2.valueOf("hdfsuri"), "12345"); }); @@ -124,12 +151,18 @@ public void configurationWithFailTest() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Exception e = Assertions.assertThrows(IllegalStateException.class, () -> { - ConfigurationImpl configuration = new ConfigurationImpl() - .loadPropertiesFile() - .with("unauthorized_key", "12345"); + Exception e = Assertions.assertThrows(IllegalArgumentException.class, () -> { + ConfigurationImpl configuration = new ConfigurationImpl(); + configuration + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); + configuration.with("unauthorized_key", "12345"); }); - Assertions.assertEquals("Unauthorized key unauthorized_key", e.getMessage()); + Assertions.assertEquals("Key not found: unauthorized_key", e.getMessage()); } @Test @@ -138,7 +171,15 @@ public void configurationWithFailTest2() { System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); Exception e = Assertions.assertThrows(IllegalArgumentException.class, () -> { - ConfigurationImpl configuration = new ConfigurationImpl().loadPropertiesFile().with("maximumFileSize", "0"); + ConfigurationImpl configuration = new ConfigurationImpl(); + configuration + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); + configuration.with("maximumFileSize", "0"); }); Assertions.assertEquals("maximumFileSize must be set to >0, got 0", e.getMessage()); } diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 04623fed..a0262c1b 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -80,15 +80,19 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index 82fcbfb8..f5ad6f91 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -82,16 +82,21 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("maximumFileSize", "3000"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("maximumFileSize", "3000"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index c80213ef..304274d4 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -80,16 +80,21 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("maximumFileSize", "30000"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("maximumFileSize", "30000"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 35135b9c..d6ce8db3 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -86,16 +86,21 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("maximumFileSize", "30000"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("maximumFileSize", "30000"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files to HDFS where one file has new timestamp and other old, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index a313409a..a7bc0a28 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -84,17 +84,21 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("maximumFileSize", "30000"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); - + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("maximumFileSize", "30000"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 579e883e..810cfcbc 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -86,16 +86,21 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("maximumFileSize", "30000"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("maximumFileSize", "30000"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 26764366..d9c75ed4 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -84,16 +84,21 @@ public void startMiniCluster() { // Set system properties to use the valid configuration with skipping of broken records disabled. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("maximumFileSize", "30000"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("maximumFileSize", "30000"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index df84c7da..7b2b413f 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -92,15 +92,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration with skipping of broken records disabled. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/failProcessing.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); } diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index 1398663b..d4781375 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -78,15 +78,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); }); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index a855fdfa..6261b97e 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -82,15 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts a single pre-made avro-file with a new timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index ce65bcd7..964f830f 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -82,15 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts a single pre-made avro-file with an olf timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index 31dfce14..14f557c1 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -82,15 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); /* Inserts pre-made avro-files to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index fa63cc0d..ceb7f692 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -82,15 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index 2a089fa8..c63fab5e 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -82,15 +82,20 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config = config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index fba0b547..4741ea1d 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -74,11 +74,16 @@ public void startMiniCluster() { // Set system properties to use the valid configuration. System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl().loadPropertiesFile(); - config = config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config = config - .with("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); - config.configureLogging(); + config = new ConfigurationImpl(); + config + .loadPropertiesFile( + System + .getProperty( + "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" + ) + ); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); }); } From 1f632749f4146276a910dacf6e4c3af81fdff727 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 27 Sep 2024 11:33:45 +0300 Subject: [PATCH 55/77] Removed toKafkaConsumerProperties() from ConfigurationImpl and moved the code for constructing kafka Properties to where it is used. --- .../configuration/ConfigurationImpl.java | 21 ++----------------- .../consumers/kafka/HdfsDataIngestion.java | 18 ++++++++++++---- .../consumers/kafka/ReadCoordinator.java | 17 ++++++++++++--- .../teragrep/cfe_39/ConfigurationTest.java | 6 +----- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java index 2613a384..5177aebe 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java @@ -63,14 +63,13 @@ public final class ConfigurationImpl implements Configuration { private final Properties properties; private final ConfigurationValidationImpl configurationValidationImpl; - private final HdfsConfiguration hdfsConfiguration; - private final KafkaConfiguration kafkaConfiguration; + private final Configuration hdfsConfiguration; + private final Configuration kafkaConfiguration; public ConfigurationImpl() { this(new Properties(), new HdfsConfiguration(), new KafkaConfiguration()); } - // This approach should be fine. The passed properties can be used for sourcing the other properties files. public ConfigurationImpl( Properties properties, HdfsConfiguration hdfsConfiguration, @@ -137,22 +136,6 @@ public boolean has(String key) { return properties.containsKey(key); } - public Properties toKafkaConsumerProperties() { - Properties kafkaProperties = new Properties(); - kafkaProperties.put("bootstrap.servers", kafkaConfiguration.valueOf("bootstrap.servers")); - kafkaProperties.put("auto.offset.reset", kafkaConfiguration.valueOf("auto.offset.reset")); - kafkaProperties.put("enable.auto.commit", kafkaConfiguration.valueOf("enable.auto.commit")); - kafkaProperties.put("group.id", kafkaConfiguration.valueOf("group.id")); - kafkaProperties.put("security.protocol", kafkaConfiguration.valueOf("security.protocol")); - kafkaProperties.put("sasl.mechanism", kafkaConfiguration.valueOf("sasl.mechanism")); - kafkaProperties.put("max.poll.records", kafkaConfiguration.valueOf("max.poll.records")); - kafkaProperties.put("fetch.max.bytes", kafkaConfiguration.valueOf("fetch.max.bytes")); - kafkaProperties.put("request.timeout.ms", kafkaConfiguration.valueOf("request.timeout.ms")); - kafkaProperties.put("max.poll.interval.ms", kafkaConfiguration.valueOf("max.poll.interval.ms")); - kafkaProperties.put("useMockKafkaConsumer", kafkaConfiguration.valueOf("useMockKafkaConsumer")); - return kafkaProperties; - } - private void configureLogging() throws IOException { // Just for loggers to work Path log4j2Config = Paths diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index dc829cf7..85f37d5f 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -79,16 +79,26 @@ public final class HdfsDataIngestion { public HdfsDataIngestion(ConfigurationImpl config) throws IOException { this.config = config; - Properties readerKafkaProperties = config.toKafkaConsumerProperties(); this.numOfConsumers = Integer.parseInt(config.valueOf("numOfConsumers")); - this.useMockKafkaConsumer = Boolean - .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); + this.useMockKafkaConsumer = Boolean.parseBoolean(config.valueOf("useMockKafkaConsumer")); if (useMockKafkaConsumer) { this.kafkaConsumer = MockKafkaConsumerFactory.getConsumer(0); // A consumer used only for scanning the available topics to be allocated to consumers running in different threads (thus 0 as input parameter). } else { + Properties kafkaProperties = new Properties(); + kafkaProperties.put("bootstrap.servers", config.valueOf("bootstrap.servers")); + kafkaProperties.put("auto.offset.reset", config.valueOf("auto.offset.reset")); + kafkaProperties.put("enable.auto.commit", config.valueOf("enable.auto.commit")); + kafkaProperties.put("group.id", config.valueOf("group.id")); + kafkaProperties.put("security.protocol", config.valueOf("security.protocol")); + kafkaProperties.put("sasl.mechanism", config.valueOf("sasl.mechanism")); + kafkaProperties.put("max.poll.records", config.valueOf("max.poll.records")); + kafkaProperties.put("fetch.max.bytes", config.valueOf("fetch.max.bytes")); + kafkaProperties.put("request.timeout.ms", config.valueOf("request.timeout.ms")); + kafkaProperties.put("max.poll.interval.ms", config.valueOf("max.poll.interval.ms")); + kafkaProperties.put("useMockKafkaConsumer", config.valueOf("useMockKafkaConsumer")); this.kafkaConsumer = new KafkaConsumer<>( - config.toKafkaConsumerProperties(), + kafkaProperties, new ByteArrayDeserializer(), new ByteArrayDeserializer() ); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index 1b0ff972..7fa236f0 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -141,11 +141,22 @@ else if (Objects.equals(name, "testConsumerTopic2")) { // Part or Runnable implementation, called when the thread is started. @Override public void run() { - boolean useMockKafkaConsumer = Boolean - .parseBoolean(config.toKafkaConsumerProperties().getProperty("useMockKafkaConsumer", "false")); + boolean useMockKafkaConsumer = Boolean.parseBoolean(config.valueOf("useMockKafkaConsumer")); + Properties kafkaProperties = new Properties(); + kafkaProperties.put("bootstrap.servers", config.valueOf("bootstrap.servers")); + kafkaProperties.put("auto.offset.reset", config.valueOf("auto.offset.reset")); + kafkaProperties.put("enable.auto.commit", config.valueOf("enable.auto.commit")); + kafkaProperties.put("group.id", config.valueOf("group.id")); + kafkaProperties.put("security.protocol", config.valueOf("security.protocol")); + kafkaProperties.put("sasl.mechanism", config.valueOf("sasl.mechanism")); + kafkaProperties.put("max.poll.records", config.valueOf("max.poll.records")); + kafkaProperties.put("fetch.max.bytes", config.valueOf("fetch.max.bytes")); + kafkaProperties.put("request.timeout.ms", config.valueOf("request.timeout.ms")); + kafkaProperties.put("max.poll.interval.ms", config.valueOf("max.poll.interval.ms")); + kafkaProperties.put("useMockKafkaConsumer", config.valueOf("useMockKafkaConsumer")); try ( KafkaReader kafkaReader = createKafkaReader( - config.toKafkaConsumerProperties(), queueTopic, callbackFunction, useMockKafkaConsumer + kafkaProperties, queueTopic, callbackFunction, useMockKafkaConsumer ) ) { while (true) { diff --git a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java index 239a0f21..8561d063 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java @@ -51,8 +51,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Properties; - import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; public class ConfigurationTest { @@ -68,10 +66,8 @@ public void kafkaPropertiesConfigurationTest() { ConfigurationImpl configuration = new ConfigurationImpl(); configuration .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Properties readerKafkaProperties = configuration.toKafkaConsumerProperties(); // Test extracting useMockKafkaConsumer value from config. - boolean useMockKafkaConsumer = Boolean - .parseBoolean(readerKafkaProperties.getProperty("useMockKafkaConsumer", "false")); + boolean useMockKafkaConsumer = Boolean.parseBoolean(configuration.valueOf("useMockKafkaConsumer")); Assertions.assertTrue(useMockKafkaConsumer); LOGGER.debug("useMockKafkaConsumer: {}", useMockKafkaConsumer); }); From d3b1a800b3ae0c2a4f46bc9a499a2d590623497d Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 27 Sep 2024 13:24:59 +0300 Subject: [PATCH 56/77] Renamed WritableQueue.java to UniqueFileCreated.java. --- .../teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java | 6 +++--- .../queue/{WritableQueue.java => UniqueFileCreated.java} | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) rename src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/{WritableQueue.java => UniqueFileCreated.java} (94%) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index de233ff8..c02042e2 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -48,7 +48,7 @@ import com.google.gson.JsonObject; import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.ConfigurationImpl; -import com.teragrep.cfe_39.consumers.kafka.queue.WritableQueue; +import com.teragrep.cfe_39.consumers.kafka.queue.UniqueFileCreated; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -68,11 +68,11 @@ public final class PartitionFileImpl implements PartitionFile { private final PartitionRecordsImpl partitionRecords; PartitionFileImpl(ConfigurationImpl config, JsonObject topicPartition) throws IOException { - WritableQueue writableQueue = new WritableQueue( + UniqueFileCreated uniqueFileCreated = new UniqueFileCreated( config.valueOf("queueDirectory"), topicPartition.get("topic").getAsString() + topicPartition.get("partition").getAsString() ); - this.syslogFile = writableQueue.getNextWritableFile(); + this.syslogFile = uniqueFileCreated.getNextWritableFile(); this.config = config; this.topicPartition = topicPartition; this.batchOffsets = new ArrayList<>(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/WritableQueue.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/UniqueFileCreated.java similarity index 94% rename from src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/WritableQueue.java rename to src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/UniqueFileCreated.java index 7cda8169..9f37fd01 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/WritableQueue.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/UniqueFileCreated.java @@ -60,14 +60,15 @@ import java.util.function.ToLongFunction; import java.util.stream.Stream; -public class WritableQueue { +// UniqueFileCreated responsibility is to create a new File object that doesn't interfere with any existing files on the given directory. +public class UniqueFileCreated { - private static final Logger LOGGER = LoggerFactory.getLogger(WritableQueue.class); + private static final Logger LOGGER = LoggerFactory.getLogger(UniqueFileCreated.class); private final Path queueDirectory; private final String queueNamePrefix; - public WritableQueue(String queueDirectory, String queueNamePrefix) { + public UniqueFileCreated(String queueDirectory, String queueNamePrefix) { this.queueDirectory = Paths.get(queueDirectory); this.queueNamePrefix = queueNamePrefix; if (!Files.isDirectory(this.queueDirectory)) { From 5f49d37ed639a87998381fb8833d0bca63d29708 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 27 Sep 2024 15:41:34 +0300 Subject: [PATCH 57/77] Implemented dropwizard metrics Timer for measuring performance. --- .../cfe_39/consumers/kafka/BatchDistributionImpl.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index 343f9c94..e8799bed 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -51,6 +51,7 @@ import com.teragrep.cfe_39.metrics.DurationStatistics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.codahale.metrics.Timer; import java.io.*; import java.time.Instant; @@ -101,8 +102,8 @@ public void accept(List batch) { ); } long batchBytes = 0L; - long start = Instant.now().toEpochMilli(); - // Starts measuring performance here. Measures how long it takes to process the whole batch. + Timer timer = new Timer(); + Timer.Context context = timer.time(); // Distribute the records of the batch to a PartitionFileImpl object based on partition from which the record originates from. ListIterator recordOffsetListIterator = batch.listIterator(); @@ -138,9 +139,8 @@ public void accept(List batch) { } }); - // Measures performance of code that is between start and end. - long end = Instant.now().toEpochMilli(); - long took = (end - start); + // Measure performance. + long took = context.stop() / 1000000L; // Convert nanoseconds to milliseconds. topicCounter.setDatabaseLatency(took); if (took == 0) { took = 1; From e3ce72ea78337e8fb16e600240df0dbf385122f7 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 1 Oct 2024 11:14:08 +0300 Subject: [PATCH 58/77] Renamed Configuration.java loadPropertiesFile(String configurationFile) to load(String configurationPath). --- src/main/java/com/teragrep/cfe_39/Main.java | 7 +-- .../cfe_39/configuration/Configuration.java | 2 +- .../configuration/ConfigurationImpl.java | 10 ++-- .../configuration/HdfsConfiguration.java | 4 +- .../configuration/KafkaConfiguration.java | 4 +- .../cfe_39/BatchDistributionTest.java | 7 +-- .../teragrep/cfe_39/ConfigurationTest.java | 46 ++++--------------- .../java/com/teragrep/cfe_39/HdfsTest.java | 7 +-- .../cfe_39/Ingestion0FilesLowSizeTest.java | 7 +-- .../teragrep/cfe_39/Ingestion0FilesTest.java | 7 +-- .../cfe_39/Ingestion1Old1NewFileTest.java | 7 +-- .../cfe_39/Ingestion2NewFilesTest.java | 7 +-- .../cfe_39/Ingestion2OldFilesTest.java | 7 +-- .../teragrep/cfe_39/KafkaConsumerTest.java | 7 +-- .../cfe_39/ProcessingFailureTest.java | 7 +-- .../teragrep/cfe_39/PruningNoFilesTest.java | 7 +-- .../cfe_39/PruningOneNewFileTest.java | 7 +-- .../cfe_39/PruningOneOldFileTest.java | 7 +-- .../cfe_39/PruningOneOldOneNewFileTest.java | 7 +-- .../cfe_39/PruningTwoNewFilesTest.java | 7 +-- .../cfe_39/PruningTwoOldFilesTest.java | 7 +-- .../teragrep/cfe_39/SyslogAvroWriterTest.java | 7 +-- 22 files changed, 37 insertions(+), 148 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index 836be54a..244242af 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -60,12 +60,7 @@ public static void main(String[] args) throws Exception { ConfigurationImpl config = new ConfigurationImpl(); try { config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); } catch (IOException e) { LOGGER.error("Can't load config: ", e); diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java index 0b55191d..e4176507 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java @@ -49,7 +49,7 @@ public interface Configuration { - void loadPropertiesFile(String configurationFile) throws IOException; + void load(String configurationPath) throws IOException; void with(String key, String value); diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java index 5177aebe..e5fdd6d9 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java @@ -56,7 +56,7 @@ import java.nio.file.Paths; import java.util.Properties; -// This class will only hold the common configuration parameters. Rename to CommonConfiguration? +// This class will only hold the common configuration parameters. public final class ConfigurationImpl implements Configuration { private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationImpl.class); @@ -83,8 +83,8 @@ public ConfigurationImpl( // This method should load the common properties belonging to this configuration object, but it should also ask the other configuration objects to do the same. @Override - public void loadPropertiesFile(String configurationFile) throws IOException { - Path configPath = Paths.get(configurationFile); + public void load(String configurationPath) throws IOException { + Path configPath = Paths.get(configurationPath); LOGGER.info("Loading application config <[{}]>", configPath.toAbsolutePath()); try (InputStream inputStream = Files.newInputStream(configPath)) { properties.load(inputStream); @@ -93,9 +93,9 @@ public void loadPropertiesFile(String configurationFile) throws IOException { } // also load the hdfs and kafka configuration files. hdfsConfiguration - .loadPropertiesFile(properties.getProperty("egress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/egress.properties")); + .load(properties.getProperty("egress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/egress.properties")); kafkaConfiguration - .loadPropertiesFile(properties.getProperty("ingress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/ingress.properties")); + .load(properties.getProperty("ingress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/ingress.properties")); configureLogging(); } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java index 942df511..ae345f7e 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java @@ -67,8 +67,8 @@ public HdfsConfiguration() { } @Override - public void loadPropertiesFile(String configurationFile) throws IOException { - Path configPath = Paths.get(configurationFile); + public void load(String configurationPath) throws IOException { + Path configPath = Paths.get(configurationPath); LOGGER.info("Loading hdfs config <[{}]>", configPath.toAbsolutePath()); try (InputStream inputStream = Files.newInputStream(configPath)) { properties.load(inputStream); diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java index c70114d7..8f632c2f 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java @@ -67,8 +67,8 @@ public KafkaConfiguration() { } @Override - public void loadPropertiesFile(String configurationFile) throws IOException { - Path configPath = Paths.get(configurationFile); + public void load(String configurationPath) throws IOException { + Path configPath = Paths.get(configurationPath); LOGGER.info("Loading hdfs config <[{}]>", configPath.toAbsolutePath()); try (InputStream inputStream = Files.newInputStream(configPath)) { properties.load(inputStream); diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 3a9380e6..d1755308 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -94,12 +94,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java index 8561d063..deaf85d7 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java @@ -64,8 +64,7 @@ public void kafkaPropertiesConfigurationTest() { System .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); ConfigurationImpl configuration = new ConfigurationImpl(); - configuration - .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + configuration.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); // Test extracting useMockKafkaConsumer value from config. boolean useMockKafkaConsumer = Boolean.parseBoolean(configuration.valueOf("useMockKafkaConsumer")); Assertions.assertTrue(useMockKafkaConsumer); @@ -80,8 +79,7 @@ public void brokenConfigurationTest() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); Exception e = Assertions.assertThrows(Exception.class, () -> { ConfigurationImpl configuration = new ConfigurationImpl(); - configuration - .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); + configuration.load(System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); }); Assertions.assertEquals("Missing required key numOfConsumers", e.getMessage()); } @@ -93,17 +91,13 @@ public void configurationEqualityTest() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); assertDoesNotThrow(() -> { ConfigurationImpl configuration1 = new ConfigurationImpl(); - configuration1 - .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + configuration1.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); ConfigurationImpl configuration2 = new ConfigurationImpl(); - configuration2 - .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + configuration2.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); ConfigurationImpl configuration3 = new ConfigurationImpl(); - configuration3 - .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + configuration3.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); ConfigurationImpl configuration4 = new ConfigurationImpl(); - configuration4 - .loadPropertiesFile(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + configuration4.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); Assertions.assertNotEquals(configuration1, configuration2); Assertions.assertNotEquals(configuration1, configuration3); Assertions.assertNotEquals(configuration3, configuration4); @@ -122,20 +116,10 @@ public void configurationWithTest() { assertDoesNotThrow(() -> { ConfigurationImpl configuration1 = new ConfigurationImpl(); configuration1 - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); ConfigurationImpl configuration2 = new ConfigurationImpl(); configuration2 - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); configuration2.with("hdfsuri", "12345"); Assertions.assertEquals(configuration1.valueOf("hdfsuri"), "hdfs://localhost:45937/"); Assertions.assertEquals(configuration2.valueOf("hdfsuri"), "12345"); @@ -150,12 +134,7 @@ public void configurationWithFailTest() { Exception e = Assertions.assertThrows(IllegalArgumentException.class, () -> { ConfigurationImpl configuration = new ConfigurationImpl(); configuration - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); configuration.with("unauthorized_key", "12345"); }); Assertions.assertEquals("Key not found: unauthorized_key", e.getMessage()); @@ -169,12 +148,7 @@ public void configurationWithFailTest2() { Exception e = Assertions.assertThrows(IllegalArgumentException.class, () -> { ConfigurationImpl configuration = new ConfigurationImpl(); configuration - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); configuration.with("maximumFileSize", "0"); }); Assertions.assertEquals("maximumFileSize must be set to >0, got 0", e.getMessage()); diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index a0262c1b..0cf02dff 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -82,12 +82,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index f5ad6f91..4e948cda 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -84,12 +84,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 304274d4..16e8f98d 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -82,12 +82,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index d6ce8db3..5d6099f6 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -88,12 +88,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index a7bc0a28..7749d83b 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -86,12 +86,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 810cfcbc..598f7ae4 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -88,12 +88,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index d9c75ed4..7a4d1804 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -86,12 +86,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 7b2b413f..df4fbc03 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -94,12 +94,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/failProcessing.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index d4781375..cc07e000 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -80,12 +80,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index 6261b97e..a0ad14c4 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -84,12 +84,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index 964f830f..f07923d0 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -84,12 +84,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index 14f557c1..c4f495a7 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -84,12 +84,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index ceb7f692..f64d0f33 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -84,12 +84,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index c63fab5e..c256790c 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -84,12 +84,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index 4741ea1d..7e4d0ef4 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -76,12 +76,7 @@ public void startMiniCluster() { .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); config = new ConfigurationImpl(); config - .loadPropertiesFile( - System - .getProperty( - "cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties" - ) - ); + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); config.with("hadoop.security.authentication", "false"); }); From f0d8de01e71a035d3eabbbac21f0f35d4ceaf0e1 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 29 Oct 2024 15:26:14 +0200 Subject: [PATCH 59/77] Checkstyle plugin implementation and Checkstyle code cleanup. --- pom.xml | 212 ++++++++++++++++++ .../cfe_39/configuration/Configuration.java | 8 +- .../ConfigurationValidation.java | 2 +- .../consumers/kafka/BatchDistribution.java | 2 +- .../consumers/kafka/FileSystemFactory.java | 2 +- .../cfe_39/consumers/kafka/HDFSWrite.java | 4 +- .../consumers/kafka/HdfsDataIngestion.java | 2 +- .../cfe_39/consumers/kafka/KafkaRecord.java | 6 +- .../kafka/MockKafkaConsumerFactory.java | 18 +- .../cfe_39/consumers/kafka/PartitionFile.java | 8 +- .../consumers/kafka/PartitionFileImpl.java | 4 +- .../consumers/kafka/PartitionRecords.java | 4 +- .../consumers/kafka/ReadCoordinator.java | 22 +- .../kafka/queue/UniqueFileCreated.java | 4 +- .../cfe_39/metrics/DurationStatistics.java | 4 +- 15 files changed, 260 insertions(+), 42 deletions(-) diff --git a/pom.xml b/pom.xml index 9160621e..44e62cfc 100644 --- a/pom.xml +++ b/pom.xml @@ -385,6 +385,218 @@ + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.5.0 + + + + scan-errors + + check + + process-classes + + error + true + true + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scan-warnings + + check + + process-classes + + warning + true + false + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java index e4176507..10f7bdac 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java @@ -49,12 +49,12 @@ public interface Configuration { - void load(String configurationPath) throws IOException; + public abstract void load(String configurationPath) throws IOException; - void with(String key, String value); + public abstract void with(String key, String value); - String valueOf(String key); + public abstract String valueOf(String key); - boolean has(String key); + public abstract boolean has(String key); } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java index 5463e15d..1b113983 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java @@ -49,6 +49,6 @@ public interface ConfigurationValidation { - void validate(Properties properties); + public abstract void validate(Properties properties); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java index 73365eaf..6f141b62 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistribution.java @@ -50,5 +50,5 @@ public interface BatchDistribution extends Consumer> { - void rebalance(); + public abstract void rebalance(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java index 0cbf1acf..25562183 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactory.java @@ -51,6 +51,6 @@ public interface FileSystemFactory { - FileSystem create(boolean initializeUGI) throws IOException; + public abstract FileSystem create(boolean initializeUGI) throws IOException; } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index 5fa1835c..ee8aa14b 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -98,8 +98,8 @@ public void commit(File syslogFile) throws IOException { LOGGER.debug("Target file <{}> doesn't exist, proceeding normally.", hdfswritepath); } - Path path = new Path(syslogFile.getPath()); - fs.copyFromLocalFile(path, hdfswritepath); + Path filePath = new Path(syslogFile.getPath()); + fs.copyFromLocalFile(filePath, hdfswritepath); LOGGER.debug("End Write file into hdfs"); LOGGER.info("\nFile committed to HDFS, file writepath should be: <{}>\n", hdfswritepath); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 85f37d5f..dfc61d57 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -82,7 +82,7 @@ public HdfsDataIngestion(ConfigurationImpl config) throws IOException { this.numOfConsumers = Integer.parseInt(config.valueOf("numOfConsumers")); this.useMockKafkaConsumer = Boolean.parseBoolean(config.valueOf("useMockKafkaConsumer")); if (useMockKafkaConsumer) { - this.kafkaConsumer = MockKafkaConsumerFactory.getConsumer(0); // A consumer used only for scanning the available topics to be allocated to consumers running in different threads (thus 0 as input parameter). + this.kafkaConsumer = new MockKafkaConsumerFactory(0).getConsumer(); // A consumer used only for scanning the available topics to be allocated to consumers running in different threads (thus 0 as input parameter). } else { Properties kafkaProperties = new Properties(); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java index 723d93ed..355a3e67 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java @@ -49,9 +49,9 @@ public interface KafkaRecord { - long size(); + public abstract long size(); - String offsetToJSON(); + public abstract String offsetToJSON(); - SyslogRecord toSyslogRecord(); + public abstract SyslogRecord toSyslogRecord(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java index 5cc24060..da94c876 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/MockKafkaConsumerFactory.java @@ -67,12 +67,14 @@ @VisibleForTesting public final class MockKafkaConsumerFactory { - final static private Logger LOGGER = LoggerFactory.getLogger(MockKafkaConsumerFactory.class); + private final Logger LOGGER = LoggerFactory.getLogger(MockKafkaConsumerFactory.class); + private final int threadNum; - private MockKafkaConsumerFactory() { + public MockKafkaConsumerFactory(int threadNumInput) { + this.threadNum = threadNumInput; } - private static void generateEvents(MockConsumer consumer, String topicName, int partition) { + private void generateEvents(MockConsumer consumer, String topicName, int partition) { consumer .addRecord( new ConsumerRecord<>( @@ -252,20 +254,20 @@ private static void generateEvents(MockConsumer consumer, String } // Can initialize topic scan with all partitions available when the input parameter is 0. Consumer is manually assigned to specific partitions depending on the threadnum parameter. For example on threadnum 1 consumer has odd numbered partitions assigned to it and threadnum 2 has the even numbered partitions. - public static Consumer getConsumer(int threadnum) { + public Consumer getConsumer() { LOGGER.warn("useMockKafkaConsumer is set, using MockKafkaConsumer"); int amountofloops = 10; // number of loops for adding partitions/records to the mock consumer topic. Each loop adds a new partition of 14 records. 17777 loops results in file size slightly above 64M. 10 loops is sized at 36,102 bits. final MockConsumer consumer; consumer = new MockConsumer<>(OffsetResetStrategy.EARLIEST); List topicPartitions = new ArrayList<>(); - LinkedHashMap beginningOffsets = new LinkedHashMap<>(); - LinkedHashMap endOffsets = new LinkedHashMap<>(); + Map beginningOffsets = new HashMap<>(); + Map endOffsets = new HashMap<>(); List mockPartitionInfo = new ArrayList<>(); // generate the topic partitions and metadata first for (int i = 0; i < amountofloops; i++) { TopicPartition topicPartition = new TopicPartition("testConsumerTopic", i); - if (threadnum == 1) { + if (threadNum == 1) { if (((i + 1) % 2) == 0) { topicPartitions.add(topicPartition); beginningOffsets.put(topicPartition, 0L); @@ -273,7 +275,7 @@ public static Consumer getConsumer(int threadnum) { mockPartitionInfo.add(new PartitionInfo("testConsumerTopic", i, null, null, null)); } } - else if (threadnum == 2) { + else if (threadNum == 2) { if (((i + 1) % 2) != 0) { topicPartitions.add(topicPartition); beginningOffsets.put(topicPartition, 0L); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java index 6c4d67f7..53988665 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFile.java @@ -49,12 +49,12 @@ public interface PartitionFile { - void addRecord(KafkaRecordImpl kafkaRecord); + public abstract void addRecord(KafkaRecordImpl kafkaRecord); - void commitRecords() throws IOException; + public abstract void commitRecords() throws IOException; - void writeToHdfsEarly() throws IOException; + public abstract void writeToHdfsEarly() throws IOException; - void delete(); + public abstract void delete(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index c02042e2..b954d7de 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -78,7 +78,7 @@ public final class PartitionFileImpl implements PartitionFile { this.batchOffsets = new ArrayList<>(); this.partitionRecords = new PartitionRecordsImpl(config); try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { - // Initializes the syslogFile. + syslogAvroWriter.close(); } if (LOGGER.isDebugEnabled()) { LOGGER @@ -162,7 +162,7 @@ private void writeToHdfs(long offset) throws IOException { } syslogFile.delete(); // Delete the file as all the contents have been stored to HDFS. try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { - // NoOp, syslogAvroWriter has initialized the empty AVRO-file. + syslogAvroWriter.close(); } batchOffsets.clear(); if (LOGGER.isDebugEnabled()) { diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java index 2b12e279..b42954ce 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecords.java @@ -51,8 +51,8 @@ public interface PartitionRecords { - void addRecord(KafkaRecordImpl kafkaRecord); + public abstract void addRecord(KafkaRecordImpl kafkaRecord); - List toSyslogRecordList(); + public abstract List toSyslogRecordList(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index 7fa236f0..fb4ca254 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -78,7 +78,7 @@ public ReadCoordinator( private KafkaReader createKafkaReader( Properties readerKafkaProperties, String topic, - BatchDistributionImpl callbackFunction, + BatchDistributionImpl callbackFunctionInput, boolean useMockKafkaConsumer ) { @@ -87,28 +87,28 @@ private KafkaReader createKafkaReader( if (useMockKafkaConsumer) { // Mock kafka consumer is enabled, create mock consumers with assigned partitions that are not overlapping with each other. String name = Thread.currentThread().getName(); // Use thread name to identify which thread is running the code. if (Objects.equals(name, "testConsumerTopic1")) { - kafkaConsumer = MockKafkaConsumerFactory.getConsumer(1); // creates a Kafka MockConsumer that has the odd numbered partitions assigned to it. + kafkaConsumer = new MockKafkaConsumerFactory(1).getConsumer(); // creates a Kafka MockConsumer that has the odd numbered partitions assigned to it. consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( kafkaConsumer, - callbackFunction, + callbackFunctionInput, config ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } else if (Objects.equals(name, "testConsumerTopic2")) { - kafkaConsumer = MockKafkaConsumerFactory.getConsumer(2); // creates a Kafka MockConsumer that has the even numbered partitions assigned to it. + kafkaConsumer = new MockKafkaConsumerFactory(2).getConsumer(); // creates a Kafka MockConsumer that has the even numbered partitions assigned to it. consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( kafkaConsumer, - callbackFunction, + callbackFunctionInput, config ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } else { - kafkaConsumer = MockKafkaConsumerFactory.getConsumer(0); // Creates a single Kafka MockConsumer that has all the partitions assigned to it. + kafkaConsumer = new MockKafkaConsumerFactory(0).getConsumer(); // Creates a single Kafka MockConsumer that has all the partitions assigned to it. consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( kafkaConsumer, - callbackFunction, + callbackFunctionInput, config ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); @@ -120,7 +120,11 @@ else if (Objects.equals(name, "testConsumerTopic2")) { new ByteArrayDeserializer(), new ByteArrayDeserializer() ); - consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl(kafkaConsumer, callbackFunction, config); + consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( + kafkaConsumer, + callbackFunctionInput, + config + ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } @@ -135,7 +139,7 @@ else if (Objects.equals(name, "testConsumerTopic2")) { } } - return new KafkaReader(kafkaConsumer, callbackFunction, consumerRebalanceListenerImpl, config); + return new KafkaReader(kafkaConsumer, callbackFunctionInput, consumerRebalanceListenerImpl, config); } // Part or Runnable implementation, called when the thread is started. diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/UniqueFileCreated.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/UniqueFileCreated.java index 9f37fd01..ec4b0fce 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/UniqueFileCreated.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/queue/UniqueFileCreated.java @@ -109,9 +109,9 @@ public File getNextWritableFile() throws IOException { } } - private BiPredicate getFileMatcher(String queueNamePrefix) { + private BiPredicate getFileMatcher(String queueNamePrefixInput) { return (path, basicFileAttributes) -> { - if (!path.getFileName().toString().startsWith(queueNamePrefix)) { + if (!path.getFileName().toString().startsWith(queueNamePrefixInput)) { return false; } else if (path.getFileName().toString().endsWith(".state")) { diff --git a/src/main/java/com/teragrep/cfe_39/metrics/DurationStatistics.java b/src/main/java/com/teragrep/cfe_39/metrics/DurationStatistics.java index 15454eaf..e2e50111 100644 --- a/src/main/java/com/teragrep/cfe_39/metrics/DurationStatistics.java +++ b/src/main/java/com/teragrep/cfe_39/metrics/DurationStatistics.java @@ -55,8 +55,8 @@ public class DurationStatistics { - MetricRegistry metricRegistry = new MetricRegistry(); - private static final Logger LOGGER = LoggerFactory.getLogger(DurationStatistics.class); + private final MetricRegistry metricRegistry = new MetricRegistry(); + private final Logger LOGGER = LoggerFactory.getLogger(DurationStatistics.class); private Instant lastReportTime = Instant.now(); private long lastBytes = 0L; private long lastRecords = 0L; From 7a21cda46c45a51bf44f321a1d95ae7164f278c7 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 31 Oct 2024 10:50:56 +0200 Subject: [PATCH 60/77] Fixed logger message. --- .../cfe_39/consumers/kafka/BatchDistributionImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index e8799bed..a981a859 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -97,8 +97,8 @@ public void accept(List batch) { if (LOGGER.isDebugEnabled()) { LOGGER .debug( - "Fuura searching your batch for <[{}]> with records <{}> and took <{}> milliseconds. <{}> EPS. ", - topic, batch.size(), (ftook), (batch.size() * 1000L / ftook) + "Searching batch for <[{}]> with <{}> records took <{}> milliseconds. <{}> EPS. ", topic, + batch.size(), (ftook), (batch.size() * 1000L / ftook) ); } long batchBytes = 0L; From ea61a29bad93426e7250ce88b762d064e452c450 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 6 Nov 2024 16:26:27 +0200 Subject: [PATCH 61/77] Added resource cleanup when exception is caught and rethrown. --- .../cfe_39/consumers/kafka/BatchDistributionImpl.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index a981a859..46838056 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -135,6 +135,10 @@ public void accept(List batch) { } catch (IOException e) { LOGGER.error("Failed to write the SyslogRecords to PartitionFileImpl <{}> in topic <{}>", key, topic); + // Cleanup resources + partitionFileMap.forEach((cleanupKey, cleanupValue) -> { + cleanupValue.delete(); + }); throw new RuntimeException(e); } }); From 0a31fdd9d23ea594de7b017dc8ffced08e344df5 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 6 Nov 2024 16:30:16 +0200 Subject: [PATCH 62/77] Removed redundant avro-file initializations from PartitionFileImpl.java. Refactored tests to check if initialized avro-file exists or not as expected. --- .../consumers/kafka/PartitionFileImpl.java | 10 ++------ .../cfe_39/BatchDistributionTest.java | 5 ---- .../cfe_39/Ingestion1Old1NewFileTest.java | 7 +----- .../cfe_39/Ingestion2NewFilesTest.java | 7 +----- .../cfe_39/Ingestion2OldFilesTest.java | 7 +----- .../cfe_39/ProcessingFailureTest.java | 24 ++----------------- 6 files changed, 7 insertions(+), 53 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index b954d7de..e5c05d88 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -77,13 +77,10 @@ public final class PartitionFileImpl implements PartitionFile { this.topicPartition = topicPartition; this.batchOffsets = new ArrayList<>(); this.partitionRecords = new PartitionRecordsImpl(config); - try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { - syslogAvroWriter.close(); - } if (LOGGER.isDebugEnabled()) { LOGGER .debug( - "PartitionFileImpl representing topic {} partition {} initialized successfully. syslogFile allocated to the object is located at {}", + "PartitionFileImpl representing topic {} partition {} initialized successfully. syslogFile path allocated to the object is {}", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath() ); } @@ -161,14 +158,11 @@ private void writeToHdfs(long offset) throws IOException { writer.commit(syslogFile); // commits the final AVRO-file to HDFS. } syslogFile.delete(); // Delete the file as all the contents have been stored to HDFS. - try (SyslogAvroWriter syslogAvroWriter = new SyslogAvroWriter(syslogFile)) { - syslogAvroWriter.close(); - } batchOffsets.clear(); if (LOGGER.isDebugEnabled()) { LOGGER .debug( - "SyslogFile representing topic {} partition {} stored to HDFS with offset value of {}. SyslogFile allocated to the object is located at {}", + "SyslogFile representing topic {} partition {} stored to HDFS with offset value of {}. SyslogFile allocated to the object located at {} has been deleted to prepare for storing new records.", topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset, syslogFile.getPath() ); } diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index d1755308..be62b0b3 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -112,11 +112,6 @@ public void teardownMiniCluster() { }); hdfsCluster.shutdown(); FileUtil.fullyDelete(baseDir); - File queueDirectory = new File(config.valueOf("queueDirectory")); - File[] files = queueDirectory.listFiles(); - if (files[0].getName().equals("topicName0.1")) { - files[0].delete(); - } } @Test diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 5d6099f6..3a343797 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -178,12 +178,7 @@ public void ingestion1Old1NewFileTest() { // Assert the avro-files that were too small to be stored in HDFS. String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); - Assertions.assertTrue(avroFile1.exists()); - DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); - DataFileReader reader1 = new DataFileReader<>(avroFile1, datumReader1); - Assertions.assertFalse(reader1.hasNext()); - reader1.close(); - avroFile1.delete(); + Assertions.assertFalse(avroFile1.exists()); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. List filenameList = new ArrayList<>(); for (int i = 1; i <= 9; i++) { diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 7749d83b..6b33b842 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -170,12 +170,7 @@ public void ingestion2NewFilesTest() { // Assert the avro-files that were too small to be stored in HDFS. String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); - Assertions.assertTrue(avroFile1.exists()); - DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); - DataFileReader reader1 = new DataFileReader<>(avroFile1, datumReader1); - Assertions.assertFalse(reader1.hasNext()); // Partition 0 avro-file should be empty. - reader1.close(); - avroFile1.delete(); + Assertions.assertFalse(avroFile1.exists()); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. List filenameList = new ArrayList<>(); for (int partition = 1; partition <= 9; partition++) { diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 598f7ae4..061197f1 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -174,12 +174,7 @@ public void ingestion2OldFilesTest() { // Assert the avro-files that were too small to be stored in HDFS. String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); - Assertions.assertTrue(avroFile1.exists()); - DatumReader datumReader1 = new SpecificDatumReader<>(SyslogRecord.class); - DataFileReader reader1 = new DataFileReader<>(avroFile1, datumReader1); - Assertions.assertFalse(reader1.hasNext()); // Partition 0 avro-file should be empty. - reader1.close(); - avroFile1.delete(); + Assertions.assertFalse(avroFile1.exists()); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. List filenameList = new ArrayList<>(); for (int i = 1; i <= 9; i++) { diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index df4fbc03..0f7a51f0 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -45,15 +45,11 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.ConfigurationImpl; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; import com.teragrep.cfe_39.metrics.topic.TopicCounter; -import org.apache.avro.file.DataFileReader; -import org.apache.avro.io.DatumReader; -import org.apache.avro.specific.SpecificDatumReader; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -158,15 +154,7 @@ public void failNonRFC5424DatabaseOutputTest() { // Assert the local avro file that should e empty. File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); - Assertions.assertEquals(1, files.length); - String path2 = config.valueOf("queueDirectory") + "/" + "topicName0.1"; - File avroFile = new File(path2); - Assertions.assertTrue(avroFile.exists()); - DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); - DataFileReader reader = new DataFileReader<>(avroFile, datumReader); - Assertions.assertFalse(reader.hasNext()); - reader.close(); - avroFile.delete(); + Assertions.assertEquals(0, files.length); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. }); } @@ -214,15 +202,7 @@ public void failNullRFC5424DatabaseOutputTest() { // Assert the local avro file that should e empty. File queueDirectory = new File(config.valueOf("queueDirectory")); File[] files = queueDirectory.listFiles(); - Assertions.assertEquals(1, files.length); - String path2 = config.valueOf("queueDirectory") + "/" + "topicName0.1"; - File avroFile = new File(path2); - Assertions.assertTrue(avroFile.exists()); - DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); - DataFileReader reader = new DataFileReader<>(avroFile, datumReader); - Assertions.assertFalse(reader.hasNext()); - reader.close(); - avroFile.delete(); + Assertions.assertEquals(0, files.length); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. }); } From b8fa90237260df5af740e3054b9f2f50c89e4e44 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 6 Nov 2024 16:41:47 +0200 Subject: [PATCH 63/77] Added consumer timeout test. Increased timeout parameter values in configuration files to prevent unintentional triggering of the consumer timeout. --- rpm/resources/application.properties | 2 +- .../cfe_39/IngestionConsumerTimeoutTest.java | 177 ++++++++++++++++++ .../resources/valid.application.properties | 2 +- 3 files changed, 179 insertions(+), 2 deletions(-) create mode 100644 src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java diff --git a/rpm/resources/application.properties b/rpm/resources/application.properties index 2d2bce5a..53de7a35 100644 --- a/rpm/resources/application.properties +++ b/rpm/resources/application.properties @@ -19,4 +19,4 @@ skipEmptyRFC5424Records=true # HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L pruneOffset=157784760000 # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS -consumerTimeout=300000 \ No newline at end of file +consumerTimeout=600000 \ No newline at end of file diff --git a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java new file mode 100644 index 00000000..49249be7 --- /dev/null +++ b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java @@ -0,0 +1,177 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39; + +import com.teragrep.cfe_39.avro.SyslogRecord; +import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; +import org.apache.avro.file.DataFileStream; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfSystemProperty; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +public class IngestionConsumerTimeoutTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(IngestionConsumerTimeoutTest.class); + private static MiniDFSCluster hdfsCluster; + private static File baseDir; + private static ConfigurationImpl config; + private FileSystem fs; + + // Prepares known state for testing. + @BeforeEach + public void startMiniCluster() { + assertDoesNotThrow(() -> { + // Set system properties to use the valid configuration. + System + .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); + config = new ConfigurationImpl(); + config + .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + // Create a HDFS miniCluster + baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); + hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); + config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + config.with("maximumFileSize", "3000000"); + config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + config.with("hadoop.security.authentication", "false"); + config.with("consumerTimeout", "1000"); // Low consumerTimeout + fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + }); + } + + // Teardown the minicluster + @AfterEach + public void teardownMiniCluster() { + assertDoesNotThrow(() -> { + fs.close(); + }); + hdfsCluster.shutdown(); + FileUtil.fullyDelete(baseDir); + } + + @DisabledIfSystemProperty( + named = "skipIngestionTest", + matches = "true" + ) + @Test + public void ingestion0FilesTest() { + /*This test case is for testing the functionality of the consumerTimeout.*/ + assertDoesNotThrow(() -> { + Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. + Assertions.assertEquals(1000, Long.parseLong(config.valueOf("consumerTimeout"))); + Assertions.assertEquals(3000000, Long.parseLong(config.valueOf("maximumFileSize"))); + Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + hdfsDataIngestion.run(); + }); + + // Assert that the kafka records were ingested correctly, HDFS should hold all the records even though maximumFileSize is set higher than expected file sizes. + assertDoesNotThrow(() -> { + String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; + Path newDirectoryPath = new Path(path); + Assertions.assertTrue(fs.exists(newDirectoryPath)); + + FileStatus[] fileStatuses = fs.listStatus(newDirectoryPath); + Assertions.assertEquals(10, fileStatuses.length); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "1.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "2.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "3.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "4.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "5.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "6.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "7.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "8.13"))); + Assertions + .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "9.13"))); + + // Assert that the expected records are present in hdfs files + for (int i = 0; i <= 9; i++) { + Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + i + ".13"); + //Init input stream + FSDataInputStream inputStream = fs.open(hdfsreadpath); + //The data is in AVRO-format, so it can't be read as a string. + DataFileStream reader = new DataFileStream<>( + inputStream, + new SpecificDatumReader<>(SyslogRecord.class) + ); + for (int j = 0; j <= 13; j++) { + Assertions.assertTrue(reader.hasNext()); + SyslogRecord syslogRecord = reader.next(); + Assertions.assertEquals(j, syslogRecord.getOffset()); + } + Assertions.assertFalse(reader.hasNext()); + } + + // Assert that all the temporary AVRO-files generated by PartitionFile objects during consumption were deleted to prepare for new records. + File queueDirectory = new File(config.valueOf("queueDirectory")); + File[] files = queueDirectory.listFiles(); + Assertions.assertEquals(0, files.length); + }); + } +} diff --git a/src/test/resources/valid.application.properties b/src/test/resources/valid.application.properties index 8f312cfa..e4009275 100644 --- a/src/test/resources/valid.application.properties +++ b/src/test/resources/valid.application.properties @@ -19,4 +19,4 @@ skipEmptyRFC5424Records=true # HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L pruneOffset=157784760000 # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS -consumerTimeout=300000 \ No newline at end of file +consumerTimeout=600000 \ No newline at end of file From 48cc3d2e5f59362accf0a49a869ba1f2e0214acf Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 13 Nov 2024 14:37:20 +0200 Subject: [PATCH 64/77] Added cnf_01 Teragrep configuration library dependency. --- pom.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pom.xml b/pom.xml index 44e62cfc..dc6f512d 100644 --- a/pom.xml +++ b/pom.xml @@ -8,6 +8,7 @@ cfe_39 -SNAPSHOT + 1.0.0 3.3.6 1.8 1.8 @@ -78,6 +79,12 @@ rlo_06 9.0.1 + + + com.teragrep + cnf_01 + ${cnf_01.version} + org.apache.kafka From 05fcb64c3385dd0ba02f5043b0cf4bce495bc088 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 13 Nov 2024 15:34:07 +0200 Subject: [PATCH 65/77] Added refactored NewCommonConfiguration.java and test that use cnf_01. --- .../configuration/NewCommonConfiguration.java | 205 ++++++++++++++++++ .../cfe_39/NewCommonConfigurationTest.java | 95 ++++++++ 2 files changed, 300 insertions(+) create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java create mode 100644 src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java diff --git a/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java new file mode 100644 index 00000000..6b01a1ee --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java @@ -0,0 +1,205 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.apache.logging.log4j.core.config.ConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +public final class NewCommonConfiguration { + + private final Logger LOGGER = LoggerFactory.getLogger(NewCommonConfiguration.class); + + private final Map config; + + public NewCommonConfiguration(Map map) { + this.config = map; + } + + // printers for configuration file paths. + + public String egressConfigurationFile() { + return config + .getOrDefault("egress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/egress.properties"); + } + + public String ingressConfigurationFile() { + return config + .getOrDefault("ingress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/ingress.properties"); + } + + public String log4j2ConfigurationFile() { + return config + .getOrDefault("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + } + + // printers for the configuration parameters. + + public String queueTopicPattern() { + return config.getOrDefault("queueTopicPattern", ".*"); + } + + public int numOfConsumers() { + final String numString = config.get("numOfConsumers"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final int numOfConsumers; + try { + numOfConsumers = Integer.parseInt(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (numOfConsumers <= 0) { + throw new ConfigurationException("Configuration error. must be a positive integer."); + } + else { + return numOfConsumers; + } + } + } + + public String queueDirectory() { + return config.getOrDefault("queueDirectory", System.getProperty("user.dir") + "/rpm/resources/queue"); + } + + public long maximumFileSize() { + final String numString = config.get("maximumFileSize"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long maximumFileSize; + try { + maximumFileSize = Long.parseLong(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (maximumFileSize <= 0) { + throw new ConfigurationException( + "Configuration error. must be a positive long value." + ); + } + else { + return maximumFileSize; + } + } + } + + public boolean skipNonRFC5424Records() { + final String skipString = config.get("skipNonRFC5424Records"); + if (skipString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return Boolean.parseBoolean(skipString); + } + } + + public boolean skipEmptyRFC5424Records() { + final String skipString = config.get("skipEmptyRFC5424Records"); + if (skipString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return Boolean.parseBoolean(skipString); + } + } + + public long pruneOffset() { + final String pruneString = config.get("pruneOffset"); + if (pruneString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long pruneOffset; + try { + pruneOffset = Long.parseLong(pruneString); + } + catch (NumberFormatException e) { + throw new RuntimeException(e); + } + if (pruneOffset <= 0) { + throw new ConfigurationException("Configuration error. must be a positive long value."); + } + else { + return pruneOffset; + } + } + } + + public long consumerTimeout() { + final String consumerTimeoutString = config.get("consumerTimeout"); + if (consumerTimeoutString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long consumerTimeout; + try { + consumerTimeout = Long.parseLong(consumerTimeoutString); + } + catch (NumberFormatException e) { + throw new RuntimeException(e); + } + if (consumerTimeout <= 0) { + throw new ConfigurationException( + "Configuration error. must be a positive long value." + ); + } + else { + return consumerTimeout; + } + } + } + +} diff --git a/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java new file mode 100644 index 00000000..e89ac230 --- /dev/null +++ b/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java @@ -0,0 +1,95 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39; + +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cnf_01.PathConfiguration; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +public class NewCommonConfigurationTest { + + private final Logger LOGGER = LoggerFactory.getLogger(NewCommonConfigurationTest.class); + + @Test + public void configurationTest() { + assertDoesNotThrow(() -> { + final PathConfiguration pathConfiguration = new PathConfiguration( + System.getProperty("user.dir") + "/src/test/resources/valid.application.properties" + ); + final Map map; + map = pathConfiguration.asMap(); + Assertions + .assertEquals( + "{pruneOffset=157784760000, numOfConsumers=2, queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, maximumFileSize=3000, skipEmptyRFC5424Records=true, consumerTimeout=600000}", + map.toString() + ); + NewCommonConfiguration commonConfig = new NewCommonConfiguration(map); + + // Assert that printers return correct values. + Assertions + .assertEquals(System.getProperty("user.dir") + "/rpm/resources/egress.properties", commonConfig.egressConfigurationFile()); + Assertions + .assertEquals(System.getProperty("user.dir") + "/rpm/resources/ingress.properties", commonConfig.ingressConfigurationFile()); + Assertions + .assertEquals(System.getProperty("user.dir") + "/rpm/resources/log4j2.properties", commonConfig.log4j2ConfigurationFile()); + Assertions.assertEquals(2, commonConfig.numOfConsumers()); + Assertions.assertEquals(3000, commonConfig.maximumFileSize()); + Assertions.assertEquals(600000, commonConfig.consumerTimeout()); + Assertions.assertEquals(157784760000L, commonConfig.pruneOffset()); + Assertions.assertTrue(commonConfig.skipNonRFC5424Records()); + Assertions.assertTrue(commonConfig.skipEmptyRFC5424Records()); + Assertions.assertEquals("/opt/teragrep/cfe_39/etc/AVRO/", commonConfig.queueDirectory()); + Assertions.assertEquals("^testConsumerTopic-*$", commonConfig.queueTopicPattern()); + }); + } +} From 8ddac46626a76102cb23e23441280b6bd09c1f2d Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Thu, 14 Nov 2024 14:54:33 +0200 Subject: [PATCH 66/77] Added refactored NewKafkaConfiguration.java, NewHdfsConfiguration.java and tests that use cnf_01. --- .../configuration/NewHdfsConfiguration.java | 226 +++++++++++++++++ .../configuration/NewKafkaConfiguration.java | 238 ++++++++++++++++++ .../cfe_39/NewHdfsConfigurationTest.java | 95 +++++++ .../cfe_39/NewKafkaConfigurationTest.java | 94 +++++++ 4 files changed, 653 insertions(+) create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/NewHdfsConfiguration.java create mode 100644 src/main/java/com/teragrep/cfe_39/configuration/NewKafkaConfiguration.java create mode 100644 src/test/java/com/teragrep/cfe_39/NewHdfsConfigurationTest.java create mode 100644 src/test/java/com/teragrep/cfe_39/NewKafkaConfigurationTest.java diff --git a/src/main/java/com/teragrep/cfe_39/configuration/NewHdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/NewHdfsConfiguration.java new file mode 100644 index 00000000..a522df18 --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/NewHdfsConfiguration.java @@ -0,0 +1,226 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.apache.logging.log4j.core.config.ConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +public final class NewHdfsConfiguration { + + private final Logger LOGGER = LoggerFactory.getLogger(NewHdfsConfiguration.class); + + private final Map config; + + public NewHdfsConfiguration(Map config) { + this.config = config; + } + + // printers for the configuration parameters. + + public long pruneOffset() { + final String numString = config.get("pruneOffset"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long pruneOffset; + try { + pruneOffset = Long.parseLong(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (pruneOffset <= 0) { + throw new ConfigurationException("Configuration error. must be a positive integer."); + } + else { + return pruneOffset; + } + } + } + + public String hdfsUri() { + final String hdfsUri = config.get("hdfsuri"); + if (hdfsUri == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return hdfsUri; + } + } + + public String hdfsPath() { + final String hdfsPath = config.get("hdfsPath"); + if (hdfsPath == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return hdfsPath; + } + } + + public String javaSecurityKrb5Kdc() { + final String javaSecurityKrb5Kdc = config.get("java.security.krb5.kdc"); + if (javaSecurityKrb5Kdc == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return javaSecurityKrb5Kdc; + } + } + + public String javaSecurityKrb5Realm() { + final String javaSecurityKrb5Realm = config.get("java.security.krb5.realm"); + if (javaSecurityKrb5Realm == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return javaSecurityKrb5Realm; + } + } + + public String hadoopSecurityAuthentication() { + final String hadoopSecurityAuthentication = config.get("hadoop.security.authentication"); + if (hadoopSecurityAuthentication == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return hadoopSecurityAuthentication; + } + } + + public String hadoopSecurityAuthorization() { + final String hadoopSecurityAuthorization = config.get("hadoop.security.authorization"); + if (hadoopSecurityAuthorization == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return hadoopSecurityAuthorization; + } + } + + public String dfsNamenodeKerberosPrincipalPattern() { + final String dfsNamenodeKerberosPrincipalPattern = config.get("dfs.namenode.kerberos.principal.pattern"); + if (dfsNamenodeKerberosPrincipalPattern == null) { + throw new ConfigurationException( + "Configuration error. must be set." + ); + } + else { + return dfsNamenodeKerberosPrincipalPattern; + } + } + + public String KerberosKeytabUser() { + final String KerberosKeytabUser = config.get("KerberosKeytabUser"); + if (KerberosKeytabUser == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return KerberosKeytabUser; + } + } + + public String KerberosKeytabPath() { + final String KerberosKeytabPath = config.get("KerberosKeytabPath"); + if (KerberosKeytabPath == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return KerberosKeytabPath; + } + } + + public String dfsClientUseDatanodeHostname() { + final String dfsClientUseDatanodeHostname = config.get("dfs.client.use.datanode.hostname"); + if (dfsClientUseDatanodeHostname == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return dfsClientUseDatanodeHostname; + } + } + + public String hadoopKerberosKeytabLoginAutorenewalEnabled() { + final String hadoopKerberosKeytabLoginAutorenewalEnabled = config + .get("hadoop.kerberos.keytab.login.autorenewal.enabled"); + if (hadoopKerberosKeytabLoginAutorenewalEnabled == null) { + throw new ConfigurationException( + "Configuration error. must be set." + ); + } + else { + return hadoopKerberosKeytabLoginAutorenewalEnabled; + } + } + + public String dfsDataTransferProtection() { + final String dfsDataTransferProtection = config.get("dfs.data.transfer.protection"); + if (dfsDataTransferProtection == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return dfsDataTransferProtection; + } + } + + public String dfsEncryptDataTransferCipherSuites() { + final String dfsEncryptDataTransferCipherSuites = config.get("dfs.encrypt.data.transfer.cipher.suites"); + if (dfsEncryptDataTransferCipherSuites == null) { + throw new ConfigurationException( + "Configuration error. must be set." + ); + } + else { + return dfsEncryptDataTransferCipherSuites; + } + } + +} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/NewKafkaConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/NewKafkaConfiguration.java new file mode 100644 index 00000000..992da3cb --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/configuration/NewKafkaConfiguration.java @@ -0,0 +1,238 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.configuration; + +import org.apache.logging.log4j.core.config.ConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +public final class NewKafkaConfiguration { + + private final Logger LOGGER = LoggerFactory.getLogger(NewKafkaConfiguration.class); + + private final Map config; + + public NewKafkaConfiguration(Map config) { + this.config = config; + } + + // printers for the configuration parameters. + + public String javaSecurityAuthLoginConfig() { + final String javaSecurityAuthLoginConfig = config.get("java.security.auth.login.config"); + if (javaSecurityAuthLoginConfig == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return javaSecurityAuthLoginConfig; + } + } + + public String bootstrapServers() { + final String bootstrapServers = config.get("bootstrap.servers"); + if (bootstrapServers == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return bootstrapServers; + } + } + + public String autoOffsetReset() { + final String autoOffsetReset = config.get("auto.offset.reset"); + if (autoOffsetReset == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return autoOffsetReset; + } + } + + public String enableAutoCommit() { + final String enableAutoCommit = config.get("enable.auto.commit"); + if (enableAutoCommit == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return enableAutoCommit; + } + } + + public String groupId() { + final String groupId = config.get("group.id"); + if (groupId == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return groupId; + } + } + + public String securityProtocol() { + final String securityProtocol = config.get("security.protocol"); + if (securityProtocol == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return securityProtocol; + } + } + + public String saslMechanism() { + final String saslMechanism = config.get("sasl.mechanism"); + if (saslMechanism == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return saslMechanism; + } + } + + public long maxPollRecords() { + final String numString = config.get("max.poll.records"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long maxPollRecords; + try { + maxPollRecords = Long.parseLong(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (maxPollRecords < 0) { + throw new ConfigurationException("Configuration error. must be a positive value."); + } + else { + return maxPollRecords; + } + } + } + + public long fetchMaxBytes() { + final String numString = config.get("fetch.max.bytes"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long fetchMaxBytes; + try { + fetchMaxBytes = Long.parseLong(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (fetchMaxBytes < 0) { + throw new ConfigurationException("Configuration error. must be a positive value."); + } + else { + return fetchMaxBytes; + } + } + } + + public long requestTimeoutMs() { + final String numString = config.get("request.timeout.ms"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long requestTimeoutMs; + try { + requestTimeoutMs = Long.parseLong(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (requestTimeoutMs < 0) { + throw new ConfigurationException("Configuration error. must be a positive value."); + } + else { + return requestTimeoutMs; + } + } + } + + public long maxPollIntervalMs() { + final String numString = config.get("max.poll.interval.ms"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long maxPollIntervalMs; + try { + maxPollIntervalMs = Long.parseLong(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (maxPollIntervalMs < 0) { + throw new ConfigurationException("Configuration error. must be a positive value."); + } + else { + return maxPollIntervalMs; + } + } + } + + public boolean useMockKafkaConsumer() { + final String useMockKafkaConsumer = config.get("useMockKafkaConsumer"); + if (useMockKafkaConsumer == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + return Boolean.parseBoolean(useMockKafkaConsumer); + } + } + +} diff --git a/src/test/java/com/teragrep/cfe_39/NewHdfsConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/NewHdfsConfigurationTest.java new file mode 100644 index 00000000..223ccfbd --- /dev/null +++ b/src/test/java/com/teragrep/cfe_39/NewHdfsConfigurationTest.java @@ -0,0 +1,95 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39; + +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cnf_01.PathConfiguration; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +public class NewHdfsConfigurationTest { + + private final Logger LOGGER = LoggerFactory.getLogger(NewHdfsConfigurationTest.class); + + @Test + public void configurationTest() { + assertDoesNotThrow(() -> { + final PathConfiguration hdfsPathConfiguration = new PathConfiguration( + System.getProperty("user.dir") + "/src/test/resources/valid.hdfs.properties" + ); + final Map hdfsMap; + hdfsMap = hdfsPathConfiguration.asMap(); + Assertions + .assertEquals( + "{pruneOffset=157784760000, hdfsuri=hdfs://localhost:45937/, dfs.namenode.kerberos.principal.pattern=test, hadoop.security.authentication=kerberos, dfs.encrypt.data.transfer.cipher.suites=test, java.security.krb5.kdc=test, KerberosKeytabPath=test, dfs.data.transfer.protection=test, dfs.client.use.datanode.hostname=false, hadoop.kerberos.keytab.login.autorenewal.enabled=true, KerberosKeytabUser=test, java.security.krb5.realm=test, hadoop.security.authorization=test, hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/}", + hdfsMap.toString() + ); + NewHdfsConfiguration hdfsConfig = new NewHdfsConfiguration(hdfsMap); + + // Assert that printers return correct values. + Assertions.assertEquals(157784760000L, hdfsConfig.pruneOffset()); + Assertions.assertEquals("hdfs://localhost:45937/", hdfsConfig.hdfsUri()); + Assertions.assertEquals("hdfs:///opt/teragrep/cfe_39/srv/", hdfsConfig.hdfsPath()); + Assertions.assertEquals("test", hdfsConfig.javaSecurityKrb5Kdc()); + Assertions.assertEquals("test", hdfsConfig.javaSecurityKrb5Realm()); + Assertions.assertEquals("kerberos", hdfsConfig.hadoopSecurityAuthentication()); + Assertions.assertEquals("test", hdfsConfig.hadoopSecurityAuthorization()); + Assertions.assertEquals("test", hdfsConfig.dfsNamenodeKerberosPrincipalPattern()); + Assertions.assertEquals("test", hdfsConfig.KerberosKeytabUser()); + Assertions.assertEquals("test", hdfsConfig.KerberosKeytabPath()); + Assertions.assertEquals("false", hdfsConfig.dfsClientUseDatanodeHostname()); + Assertions.assertEquals("true", hdfsConfig.hadoopKerberosKeytabLoginAutorenewalEnabled()); + Assertions.assertEquals("test", hdfsConfig.dfsDataTransferProtection()); + Assertions.assertEquals("test", hdfsConfig.dfsEncryptDataTransferCipherSuites()); + }); + } +} diff --git a/src/test/java/com/teragrep/cfe_39/NewKafkaConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/NewKafkaConfigurationTest.java new file mode 100644 index 00000000..a1c09fcd --- /dev/null +++ b/src/test/java/com/teragrep/cfe_39/NewKafkaConfigurationTest.java @@ -0,0 +1,94 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39; + +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cnf_01.PathConfiguration; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +public class NewKafkaConfigurationTest { + + private final Logger LOGGER = LoggerFactory.getLogger(NewKafkaConfigurationTest.class); + + @Test + public void configurationTest() { + assertDoesNotThrow(() -> { + final PathConfiguration kafkaPathConfiguration = new PathConfiguration( + System.getProperty("user.dir") + "/src/test/resources/valid.kafka.properties" + ); + final Map kafkaMap; + kafkaMap = kafkaPathConfiguration.asMap(); + Assertions + .assertEquals( + "{java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas, security.protocol=SASL_PLAINTEXT, useMockKafkaConsumer=true, enable.auto.commit=false, max.poll.records=500, request.timeout.ms=300000, sasl.mechanism=PLAIN, group.id=cfe_39, bootstrap.servers=test, fetch.max.bytes=1073741820, max.poll.interval.ms=300000, auto.offset.reset=earliest}", + kafkaMap.toString() + ); + NewKafkaConfiguration kafkaConfig = new NewKafkaConfiguration(kafkaMap); + + // Assert that printers return correct values. + Assertions.assertEquals("/opt/teragrep/cfe_39/etc/config.jaas", kafkaConfig.javaSecurityAuthLoginConfig()); + Assertions.assertEquals("test", kafkaConfig.bootstrapServers()); + Assertions.assertEquals("earliest", kafkaConfig.autoOffsetReset()); + Assertions.assertEquals("false", kafkaConfig.enableAutoCommit()); + Assertions.assertEquals("cfe_39", kafkaConfig.groupId()); + Assertions.assertEquals("SASL_PLAINTEXT", kafkaConfig.securityProtocol()); + Assertions.assertEquals("PLAIN", kafkaConfig.saslMechanism()); + Assertions.assertEquals(500, kafkaConfig.maxPollRecords()); + Assertions.assertEquals(1073741820, kafkaConfig.fetchMaxBytes()); + Assertions.assertEquals(300000, kafkaConfig.requestTimeoutMs()); + Assertions.assertEquals(300000, kafkaConfig.maxPollIntervalMs()); + Assertions.assertTrue(kafkaConfig.useMockKafkaConsumer()); + + }); + } +} From bf496f4f345da65106404471f0217032e3bbb57b Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 15 Nov 2024 13:56:43 +0200 Subject: [PATCH 67/77] Moved pruneOffset configuration parameter from application.properties to egress.properties. Disabled ConfigurationTest.java to prepare for refactoring. --- rpm/resources/application.properties | 2 -- rpm/resources/egress.properties | 2 ++ .../configuration/NewCommonConfiguration.java | 22 ------------------- .../teragrep/cfe_39/ConfigurationTest.java | 7 ++++++ .../cfe_39/NewCommonConfigurationTest.java | 3 +-- .../resources/valid.application.properties | 2 -- 6 files changed, 10 insertions(+), 28 deletions(-) diff --git a/rpm/resources/application.properties b/rpm/resources/application.properties index 53de7a35..9db46c63 100644 --- a/rpm/resources/application.properties +++ b/rpm/resources/application.properties @@ -16,7 +16,5 @@ maximumFileSize=3000 skipNonRFC5424Records=true # Boolean for deciding if empty RFC5424 records should be skipped or not. skipEmptyRFC5424Records=true -# HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L -pruneOffset=157784760000 # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS consumerTimeout=600000 \ No newline at end of file diff --git a/rpm/resources/egress.properties b/rpm/resources/egress.properties index dbc5adf7..864b019a 100644 --- a/rpm/resources/egress.properties +++ b/rpm/resources/egress.properties @@ -1,3 +1,5 @@ +# HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L +pruneOffset=157784760000 # HDFS uri hdfsuri=hdfs://localhost:45937/ # HDFS path diff --git a/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java index 6b01a1ee..eace92f6 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java @@ -156,28 +156,6 @@ public boolean skipEmptyRFC5424Records() { } } - public long pruneOffset() { - final String pruneString = config.get("pruneOffset"); - if (pruneString == null) { - throw new ConfigurationException("Configuration error. must be set."); - } - else { - final long pruneOffset; - try { - pruneOffset = Long.parseLong(pruneString); - } - catch (NumberFormatException e) { - throw new RuntimeException(e); - } - if (pruneOffset <= 0) { - throw new ConfigurationException("Configuration error. must be a positive long value."); - } - else { - return pruneOffset; - } - } - } - public long consumerTimeout() { final String consumerTimeoutString = config.get("consumerTimeout"); if (consumerTimeoutString == null) { diff --git a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java index deaf85d7..06d6f42c 100644 --- a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java @@ -47,6 +47,7 @@ import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,6 +58,7 @@ public class ConfigurationTest { private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationTest.class); + @Disabled(value = "Preparing configuration refactoring") @Test public void kafkaPropertiesConfigurationTest() { assertDoesNotThrow(() -> { @@ -72,6 +74,7 @@ public void kafkaPropertiesConfigurationTest() { }); } + @Disabled(value = "Preparing configuration refactoring") @Test public void brokenConfigurationTest() { // Set system properties to use the broken configuration. @@ -84,6 +87,7 @@ public void brokenConfigurationTest() { Assertions.assertEquals("Missing required key numOfConsumers", e.getMessage()); } + @Disabled(value = "Preparing configuration refactoring") @Test public void configurationEqualityTest() { // Set system properties to use the valid configuration. @@ -108,6 +112,7 @@ public void configurationEqualityTest() { }); } + @Disabled(value = "Preparing configuration refactoring") @Test public void configurationWithTest() { // Set system properties to use the valid configuration. @@ -126,6 +131,7 @@ public void configurationWithTest() { }); } + @Disabled(value = "Preparing configuration refactoring") @Test public void configurationWithFailTest() { // Set system properties to use the valid configuration. @@ -140,6 +146,7 @@ public void configurationWithFailTest() { Assertions.assertEquals("Key not found: unauthorized_key", e.getMessage()); } + @Disabled(value = "Preparing configuration refactoring") @Test public void configurationWithFailTest2() { // Set system properties to use the valid configuration. diff --git a/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java index e89ac230..9d6488c6 100644 --- a/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java @@ -70,7 +70,7 @@ public void configurationTest() { map = pathConfiguration.asMap(); Assertions .assertEquals( - "{pruneOffset=157784760000, numOfConsumers=2, queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, maximumFileSize=3000, skipEmptyRFC5424Records=true, consumerTimeout=600000}", + "{numOfConsumers=2, queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, maximumFileSize=3000, skipEmptyRFC5424Records=true, consumerTimeout=600000}", map.toString() ); NewCommonConfiguration commonConfig = new NewCommonConfiguration(map); @@ -85,7 +85,6 @@ public void configurationTest() { Assertions.assertEquals(2, commonConfig.numOfConsumers()); Assertions.assertEquals(3000, commonConfig.maximumFileSize()); Assertions.assertEquals(600000, commonConfig.consumerTimeout()); - Assertions.assertEquals(157784760000L, commonConfig.pruneOffset()); Assertions.assertTrue(commonConfig.skipNonRFC5424Records()); Assertions.assertTrue(commonConfig.skipEmptyRFC5424Records()); Assertions.assertEquals("/opt/teragrep/cfe_39/etc/AVRO/", commonConfig.queueDirectory()); diff --git a/src/test/resources/valid.application.properties b/src/test/resources/valid.application.properties index e4009275..acac899b 100644 --- a/src/test/resources/valid.application.properties +++ b/src/test/resources/valid.application.properties @@ -16,7 +16,5 @@ maximumFileSize=3000 skipNonRFC5424Records=true # Boolean for deciding if empty RFC5424 records should be skipped or not. skipEmptyRFC5424Records=true -# HDFS pruning, use 157784760000 value while testing HDFS writes to ensure the test records are not pruned. 157784760000L -pruneOffset=157784760000 # timeout modifier for when the consumer's cache of intermediate results are flushed to HDFS consumerTimeout=600000 \ No newline at end of file From 37d2a704b1a36fe6aa36a7c8c415b9f0915c460c Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 15 Nov 2024 14:02:18 +0200 Subject: [PATCH 68/77] Refactored code to use the refactored configuration classes and cnf_01. --- src/main/java/com/teragrep/cfe_39/Main.java | 61 ++++++++-- .../kafka/BatchDistributionImpl.java | 12 +- .../kafka/ConsumerRebalanceListenerImpl.java | 6 +- .../kafka/FileSystemFactoryImpl.java | 40 +++---- .../cfe_39/consumers/kafka/HDFSPrune.java | 8 +- .../cfe_39/consumers/kafka/HDFSRead.java | 6 +- .../cfe_39/consumers/kafka/HDFSWrite.java | 8 +- .../consumers/kafka/HdfsDataIngestion.java | 63 +++++++---- .../cfe_39/consumers/kafka/KafkaReader.java | 8 +- .../consumers/kafka/PartitionFileImpl.java | 16 ++- .../consumers/kafka/PartitionRecordsImpl.java | 10 +- .../consumers/kafka/ReadCoordinator.java | 45 ++++---- .../cfe_39/BatchDistributionTest.java | 86 +++++++++----- .../java/com/teragrep/cfe_39/HdfsTest.java | 93 +++++++++------ .../cfe_39/Ingestion0FilesLowSizeTest.java | 103 +++++++++++------ .../teragrep/cfe_39/Ingestion0FilesTest.java | 79 +++++++++---- .../cfe_39/Ingestion1Old1NewFileTest.java | 100 +++++++++++------ .../cfe_39/Ingestion2NewFilesTest.java | 98 ++++++++++------ .../cfe_39/Ingestion2OldFilesTest.java | 96 +++++++++++----- .../cfe_39/IngestionConsumerTimeoutTest.java | 106 ++++++++++++------ .../teragrep/cfe_39/KafkaConsumerTest.java | 80 ++++++++++--- .../cfe_39/ProcessingFailureTest.java | 69 ++++++++---- .../teragrep/cfe_39/PruningNoFilesTest.java | 45 +++++--- .../cfe_39/PruningOneNewFileTest.java | 56 ++++----- .../cfe_39/PruningOneOldFileTest.java | 55 +++++---- .../cfe_39/PruningOneOldOneNewFileTest.java | 60 +++++----- .../cfe_39/PruningTwoNewFilesTest.java | 55 +++++---- .../cfe_39/PruningTwoOldFilesTest.java | 59 +++++----- .../teragrep/cfe_39/SyslogAvroWriterTest.java | 33 +++--- .../cfe_39/TestMiniClusterFactory.java | 3 +- 30 files changed, 1010 insertions(+), 549 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index 244242af..7ec77aca 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -45,33 +45,70 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; +import com.teragrep.cnf_01.ConfigurationException; +import com.teragrep.cnf_01.PathConfiguration; +import org.apache.logging.log4j.core.config.Configurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Map; public final class Main { private static final Logger LOGGER = LoggerFactory.getLogger(Main.class); public static void main(String[] args) throws Exception { - ConfigurationImpl config = new ConfigurationImpl(); + // NewCommonConfiguration + final PathConfiguration pathConfiguration = new PathConfiguration( + System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties") + ); + final Map map; try { - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + map = pathConfiguration.asMap(); } - catch (IOException e) { - LOGGER.error("Can't load config: ", e); - System.exit(1); + catch (ConfigurationException e) { + LOGGER.error("Failed to create PathConfiguration: <{}>", e.getMessage()); + throw e; } - catch (IllegalArgumentException e) { - LOGGER.error("Got invalid config: ", e); - System.exit(1); + NewCommonConfiguration commonConfig = new NewCommonConfiguration(map); + + // log4j2 configuration + Path log4j2Config = Paths + .get(commonConfig.log4j2ConfigurationFile(), System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); + Configurator.reconfigure(log4j2Config.toUri()); + + // NewKafkaConfiguration + final PathConfiguration kafkaPathConfiguration = new PathConfiguration(commonConfig.egressConfigurationFile()); + final Map kafkaMap; + try { + kafkaMap = kafkaPathConfiguration.asMap(); } + catch (ConfigurationException e) { + LOGGER.error("Failed to create PathConfiguration: <{}>", e.getMessage()); + throw e; + } + NewKafkaConfiguration kafkaConfig = new NewKafkaConfiguration(kafkaMap); + + // NewHdfsConfiguration + final PathConfiguration hdfsPathConfiguration = new PathConfiguration(commonConfig.ingressConfigurationFile()); + final Map hdfsMap; + try { + hdfsMap = hdfsPathConfiguration.asMap(); + } + catch (ConfigurationException e) { + LOGGER.error("Failed to create PathConfiguration: <{}>", e.getMessage()); + throw e; + } + NewHdfsConfiguration hdfsConfig = new NewHdfsConfiguration(hdfsMap); + LOGGER.info("Running main program"); - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(commonConfig, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index 46838056..9c8ea094 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -46,7 +46,8 @@ package com.teragrep.cfe_39.consumers.kafka; import com.google.gson.*; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import com.teragrep.cfe_39.metrics.DurationStatistics; import org.slf4j.Logger; @@ -69,16 +70,19 @@ public final class BatchDistributionImpl implements BatchDistribution { private final DurationStatistics durationStatistics; private final TopicCounter topicCounter; private long lastTimeCalled; - private final ConfigurationImpl config; + private final NewCommonConfiguration config; + private final NewHdfsConfiguration hdfsConfig; private final Map partitionFileMap; public BatchDistributionImpl( - ConfigurationImpl config, + NewCommonConfiguration config, + NewHdfsConfiguration hdfsConfig, String topic, DurationStatistics durationStatistics, TopicCounter topicCounter ) { this.config = config; + this.hdfsConfig = hdfsConfig; this.topic = topic; this.durationStatistics = durationStatistics; this.topicCounter = topicCounter; @@ -114,7 +118,7 @@ public void accept(List batch) { if (!partitionFileMap.containsKey(recordOffset.get("partition").getAsString())) { try { partitionFileMap - .put(recordOffset.get("partition").getAsString(), new PartitionFileImpl(config, recordOffset)); + .put(recordOffset.get("partition").getAsString(), new PartitionFileImpl(config, hdfsConfig, recordOffset)); } catch (IOException e) { LOGGER.error("Failed to create new PartitionFileImpl for record <{}>", recordOffset); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index 4108091e..5fb29962 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import org.apache.hadoop.fs.FileSystem; import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; @@ -64,12 +64,12 @@ public final class ConsumerRebalanceListenerImpl implements ConsumerRebalanceLis private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; - private final ConfigurationImpl config; + private final NewHdfsConfiguration config; public ConsumerRebalanceListenerImpl( Consumer kafkaConsumer, BatchDistributionImpl callbackFunction, - ConfigurationImpl config + NewHdfsConfiguration config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java index 65ededaf..d6eaaf62 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -58,61 +58,53 @@ public final class FileSystemFactoryImpl implements FileSystemFactory { private final HdfsConfiguration conf; - private final ConfigurationImpl configuration; + private final NewHdfsConfiguration configuration; - public FileSystemFactoryImpl(ConfigurationImpl configuration) { + public FileSystemFactoryImpl(NewHdfsConfiguration configuration) { this.conf = new HdfsConfiguration(); this.configuration = configuration; } public FileSystem create(boolean initializeUGI) throws IOException { FileSystem fs; - if ("kerberos".equals(configuration.valueOf("hadoop.security.authentication"))) { + if ("kerberos".equals(configuration.hadoopSecurityAuthentication())) { // Initializing the FileSystem with kerberos. - String hdfsuri = configuration.valueOf("hdfsuri"); // Get from config. + String hdfsuri = configuration.hdfsUri(); // Get from config. // set kerberos host and realm - System.setProperty("java.security.krb5.realm", configuration.valueOf("java.security.krb5.realm")); - System.setProperty("java.security.krb5.kdc", configuration.valueOf("java.security.krb5.kdc")); + System.setProperty("java.security.krb5.realm", configuration.javaSecurityKrb5Realm()); + System.setProperty("java.security.krb5.kdc", configuration.javaSecurityKrb5Kdc()); conf.clear(); // enable kerberus - conf.set("hadoop.security.authentication", configuration.valueOf("hadoop.security.authentication")); - conf.set("hadoop.security.authorization", configuration.valueOf("hadoop.security.authorization")); + conf.set("hadoop.security.authentication", configuration.hadoopSecurityAuthentication()); + conf.set("hadoop.security.authorization", configuration.hadoopSecurityAuthorization()); conf .set( "hadoop.kerberos.keytab.login.autorenewal.enabled", - configuration.valueOf("hadoop.kerberos.keytab.login.autorenewal.enabled") + configuration.hadoopKerberosKeytabLoginAutorenewalEnabled() ); conf.set("fs.defaultFS", hdfsuri); // Set FileSystem URI conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName()); // Maven stuff? conf.set("fs.file.impl", LocalFileSystem.class.getName()); // Maven stuff? /* hack for running locally with fake DNS records set this to true if overriding the host name in /etc/hosts*/ - conf.set("dfs.client.use.datanode.hostname", configuration.valueOf("dfs.client.use.datanode.hostname")); + conf.set("dfs.client.use.datanode.hostname", configuration.dfsClientUseDatanodeHostname()); /* server principal the kerberos principle that the namenode is using*/ - conf - .set( - "dfs.namenode.kerberos.principal.pattern", - configuration.valueOf("dfs.namenode.kerberos.principal.pattern") - ); + conf.set("dfs.namenode.kerberos.principal.pattern", configuration.dfsNamenodeKerberosPrincipalPattern()); // set sasl - conf.set("dfs.data.transfer.protection", configuration.valueOf("dfs.data.transfer.protection")); - conf - .set( - "dfs.encrypt.data.transfer.cipher.suites", - configuration.valueOf("dfs.encrypt.data.transfer.cipher.suites") - ); + conf.set("dfs.data.transfer.protection", configuration.dfsDataTransferProtection()); + conf.set("dfs.encrypt.data.transfer.cipher.suites", configuration.dfsEncryptDataTransferCipherSuites()); if (initializeUGI) { UserGroupInformation.setConfiguration(conf); UserGroupInformation - .loginUserFromKeytab(configuration.valueOf("KerberosKeytabUser"), configuration.valueOf("KerberosKeytabPath")); + .loginUserFromKeytab(configuration.KerberosKeytabUser(), configuration.KerberosKeytabPath()); } // filesystem for HDFS access is set here fs = FileSystem.get(conf); } else { // Initializing the FileSystem with minicluster. - String hdfsuri = configuration.valueOf("hdfsuri"); + String hdfsuri = configuration.hdfsUri(); // ====== Init HDFS File System Object conf.clear(); // Set FileSystem URI diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java index 1e7a97db..a58c8c83 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -61,9 +61,9 @@ public final class HDFSPrune { private final Path newDirectoryPath; private final long cutOffEpoch; - public HDFSPrune(ConfigurationImpl config, String topicName, FileSystem fs) throws IOException { + public HDFSPrune(NewHdfsConfiguration config, String topicName, FileSystem fs) throws IOException { this.fs = fs; - String path = config.valueOf("hdfsPath").concat("/").concat(topicName); + String path = config.hdfsPath().concat("/").concat(topicName); //==== Create directory if not exists Path workingDir = fs.getWorkingDirectory(); newDirectoryPath = new Path(path); @@ -72,7 +72,7 @@ public HDFSPrune(ConfigurationImpl config, String topicName, FileSystem fs) thro fs.mkdirs(newDirectoryPath); LOGGER.info("Path <{}> created.", path); } - long pruneOffset = Long.parseLong(config.valueOf("pruneOffset")); + long pruneOffset = config.pruneOffset(); cutOffEpoch = System.currentTimeMillis() - pruneOffset; // pruneOffset is parametrized in Config.java. Default value is 2 days in milliseconds. } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java index 43696b8b..e289a6cc 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import org.apache.hadoop.fs.*; import org.apache.kafka.common.TopicPartition; import org.slf4j.Logger; @@ -64,9 +64,9 @@ The offset map can then be used for kafka consumer seek() method, which will add private final FileSystem fs; private final String path; - public HDFSRead(ConfigurationImpl config, FileSystem fs) throws IOException { + public HDFSRead(NewHdfsConfiguration config, FileSystem fs) throws IOException { this.fs = fs; - path = config.valueOf("hdfsPath"); + path = config.hdfsPath(); } public Map hdfsStartOffsets() throws IOException { diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index ee8aa14b..105e7ed5 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import org.apache.hadoop.fs.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,11 +58,11 @@ public final class HDFSWrite implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(HDFSWrite.class); private final String fileName; private final String path; - private final ConfigurationImpl configuration; + private final NewHdfsConfiguration configuration; - public HDFSWrite(ConfigurationImpl config, String topic, String partition, long offset) { + public HDFSWrite(NewHdfsConfiguration config, String topic, String partition, long offset) { this.configuration = config; - path = config.valueOf("hdfsPath") + "/" + topic; + path = config.hdfsPath() + "/" + topic; fileName = partition + "." + offset; // filename should be constructed from partition and offset. } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index dfc61d57..810d582c 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -45,7 +45,9 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.metrics.*; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import org.apache.hadoop.fs.FileSystem; @@ -69,7 +71,9 @@ public final class HdfsDataIngestion { private static final Logger LOGGER = LoggerFactory.getLogger(HdfsDataIngestion.class); - private final ConfigurationImpl config; + private final NewCommonConfiguration config; + private final NewHdfsConfiguration hdfsConfig; + private final NewKafkaConfiguration kafkaConfig; private final org.apache.kafka.clients.consumer.Consumer kafkaConsumer; private final List threads = new ArrayList<>(); private final Set activeTopics = new HashSet<>(); @@ -77,26 +81,31 @@ public final class HdfsDataIngestion { private final int numOfConsumers; private final Map hdfsStartOffsets; - public HdfsDataIngestion(ConfigurationImpl config) throws IOException { + public HdfsDataIngestion( + NewCommonConfiguration config, + NewHdfsConfiguration hdfsConfiguration, + NewKafkaConfiguration kafkaConfiguration + ) throws IOException { this.config = config; - this.numOfConsumers = Integer.parseInt(config.valueOf("numOfConsumers")); - this.useMockKafkaConsumer = Boolean.parseBoolean(config.valueOf("useMockKafkaConsumer")); + this.hdfsConfig = hdfsConfiguration; + this.kafkaConfig = kafkaConfiguration; + this.numOfConsumers = config.numOfConsumers(); + this.useMockKafkaConsumer = kafkaConfiguration.useMockKafkaConsumer(); if (useMockKafkaConsumer) { this.kafkaConsumer = new MockKafkaConsumerFactory(0).getConsumer(); // A consumer used only for scanning the available topics to be allocated to consumers running in different threads (thus 0 as input parameter). } else { Properties kafkaProperties = new Properties(); - kafkaProperties.put("bootstrap.servers", config.valueOf("bootstrap.servers")); - kafkaProperties.put("auto.offset.reset", config.valueOf("auto.offset.reset")); - kafkaProperties.put("enable.auto.commit", config.valueOf("enable.auto.commit")); - kafkaProperties.put("group.id", config.valueOf("group.id")); - kafkaProperties.put("security.protocol", config.valueOf("security.protocol")); - kafkaProperties.put("sasl.mechanism", config.valueOf("sasl.mechanism")); - kafkaProperties.put("max.poll.records", config.valueOf("max.poll.records")); - kafkaProperties.put("fetch.max.bytes", config.valueOf("fetch.max.bytes")); - kafkaProperties.put("request.timeout.ms", config.valueOf("request.timeout.ms")); - kafkaProperties.put("max.poll.interval.ms", config.valueOf("max.poll.interval.ms")); - kafkaProperties.put("useMockKafkaConsumer", config.valueOf("useMockKafkaConsumer")); + kafkaProperties.put("bootstrap.servers", kafkaConfiguration.bootstrapServers()); + kafkaProperties.put("auto.offset.reset", kafkaConfiguration.autoOffsetReset()); + kafkaProperties.put("enable.auto.commit", kafkaConfiguration.enableAutoCommit()); + kafkaProperties.put("group.id", kafkaConfiguration.groupId()); + kafkaProperties.put("security.protocol", kafkaConfiguration.securityProtocol()); + kafkaProperties.put("sasl.mechanism", kafkaConfiguration.saslMechanism()); + kafkaProperties.put("max.poll.records", kafkaConfiguration.maxPollRecords()); + kafkaProperties.put("fetch.max.bytes", kafkaConfiguration.fetchMaxBytes()); + kafkaProperties.put("request.timeout.ms", kafkaConfiguration.requestTimeoutMs()); + kafkaProperties.put("max.poll.interval.ms", kafkaConfiguration.maxPollIntervalMs()); this.kafkaConsumer = new KafkaConsumer<>( kafkaProperties, new ByteArrayDeserializer(), @@ -116,11 +125,11 @@ public void run() throws InterruptedException, IOException { List topicCounters = new CopyOnWriteArrayList<>(); // Initialize FileSystem - FileSystemFactoryImpl fileSystemFactoryImpl = new FileSystemFactoryImpl(config); + FileSystemFactoryImpl fileSystemFactoryImpl = new FileSystemFactoryImpl(hdfsConfig); FileSystem fs = fileSystemFactoryImpl.create(true); // Generates offsets of the already committed records for Kafka and passes them to the kafka consumers. - try (HDFSRead hr = new HDFSRead(config, fs)) { + try (HDFSRead hr = new HDFSRead(hdfsConfig, fs)) { hdfsStartOffsets.clear(); hdfsStartOffsets.putAll(hr.hdfsStartOffsets()); LOGGER.debug("topicPartitionStartMap generated succesfully: <{}>", hdfsStartOffsets); @@ -131,7 +140,7 @@ public void run() throws InterruptedException, IOException { boolean keepRunning = true; while (keepRunning) { - if ("kerberos".equals(config.valueOf("hadoop.security.authentication"))) { + if ("kerberos".equals(hdfsConfig.hadoopSecurityAuthentication())) { UserGroupInformation.getLoginUser().checkTGTAndReloginFromKeytab(); } LOGGER.debug("Scanning for threads"); @@ -145,7 +154,7 @@ public void run() throws InterruptedException, IOException { LOGGER.info("topic that is being pruned: <{}>", topic_name); if (topic_name != null) { try { - HDFSPrune hdfsPrune = new HDFSPrune(config, topic_name, fs); + HDFSPrune hdfsPrune = new HDFSPrune(hdfsConfig, topic_name, fs); hdfsPrune.prune(); } catch (IOException e) { @@ -180,11 +189,19 @@ private void createReader( for (int threadId = 1; numOfConsumers >= threadId; threadId++) { BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, topic, // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics topicCounter // TopicCounter object from metrics ); - ReadCoordinator readCoordinator = new ReadCoordinator(topic, config, output, hdfsStartOffsets); + ReadCoordinator readCoordinator = new ReadCoordinator( + topic, + config, + kafkaConfig, + hdfsConfig, + output, + hdfsStartOffsets + ); Thread readThread = new Thread(null, readCoordinator, topic + threadId); // Starts the thread with readCoordinator that creates the consumer and subscribes to the topic. threads.add(readThread); readThread.start(); // Starts the thread, in other words proceeds to call run() function of ReadCoordinator. @@ -194,7 +211,7 @@ private void createReader( private void topicScan(DurationStatistics durationStatistics, List topicCounters) { Map> listTopics = kafkaConsumer.listTopics(Duration.ofSeconds(60)); - Pattern topicsRegex = Pattern.compile(config.valueOf("queueTopicPattern")); + Pattern topicsRegex = Pattern.compile(config.queueTopicPattern()); // Find the topics available in Kafka based on given QueueTopicPattern, both active and in-active. Set foundTopics = new HashSet<>(); Map> foundPartitions = new HashMap<>(); @@ -206,7 +223,7 @@ private void topicScan(DurationStatistics durationStatistics, List } } if (foundTopics.isEmpty()) { - throw new IllegalStateException("Pattern <[" + config.valueOf("queueTopicPattern") + "]> found no topics."); + throw new IllegalStateException("Pattern <[" + config.queueTopicPattern() + "]> found no topics."); } // subtract currently active topics from found topics foundTopics.removeAll(activeTopics); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index d3c1c444..ab0f0477 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; import org.apache.kafka.clients.consumer.*; import org.slf4j.Logger; @@ -59,7 +59,7 @@ public final class KafkaReader implements AutoCloseable { private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); - private final ConfigurationImpl config; + private final NewCommonConfiguration config; private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; private final ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl; @@ -69,7 +69,7 @@ public KafkaReader( Consumer kafkaConsumer, BatchDistributionImpl callbackFunction, ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl, - ConfigurationImpl config + NewCommonConfiguration config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; @@ -110,7 +110,7 @@ KafkaRecord and other required data for HDFS storage are added to the input para // If no new kafka record batches is received for a while, use callbackFunction.accept() with empty recordOffsetObjectList to flush records that have already been committed in kafka to HDFS. long thisTime = Instant.now().toEpochMilli(); long ftook = thisTime - lastTimeCalled; - if (ftook > Long.parseLong(config.valueOf("consumerTimeout"))) { + if (ftook > config.consumerTimeout()) { callbackFunction.accept(recordOffsetObjectList); lastTimeCalled = Instant.now().toEpochMilli(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index e5c05d88..5ea2adae 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -47,7 +47,8 @@ import com.google.gson.JsonObject; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.queue.UniqueFileCreated; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,18 +63,21 @@ public final class PartitionFileImpl implements PartitionFile { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); private final JsonObject topicPartition; - private final ConfigurationImpl config; + private final NewCommonConfiguration config; + private final NewHdfsConfiguration hdfsConfig; private final File syslogFile; private final List batchOffsets; private final PartitionRecordsImpl partitionRecords; - PartitionFileImpl(ConfigurationImpl config, JsonObject topicPartition) throws IOException { + PartitionFileImpl(NewCommonConfiguration config, NewHdfsConfiguration hdfsConfig, JsonObject topicPartition) + throws IOException { UniqueFileCreated uniqueFileCreated = new UniqueFileCreated( - config.valueOf("queueDirectory"), + config.queueDirectory(), topicPartition.get("topic").getAsString() + topicPartition.get("partition").getAsString() ); this.syslogFile = uniqueFileCreated.getNextWritableFile(); this.config = config; + this.hdfsConfig = hdfsConfig; this.topicPartition = topicPartition; this.batchOffsets = new ArrayList<>(); this.partitionRecords = new PartitionRecordsImpl(config); @@ -103,7 +107,7 @@ public void commitRecords() throws IOException { storedOffset = next.getOffset(); } // When the file size has gone above the maximum, commit the file into HDFS using the latest topic/partition/offset values as the filename and then delete the local avro-file. - if (Long.parseLong(config.valueOf("maximumFileSize")) < syslogFile.length()) { + if (config.maximumFileSize() < syslogFile.length()) { writeToHdfs(storedOffset); } } @@ -153,7 +157,7 @@ public void delete() { // Writes the file to hdfs and initializes new file. private void writeToHdfs(long offset) throws IOException { try ( - HDFSWrite writer = new HDFSWrite(config, topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset) + HDFSWrite writer = new HDFSWrite(hdfsConfig, topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset) ) { writer.commit(syslogFile); // commits the final AVRO-file to HDFS. } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java index dd55745a..5bea5b60 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39.consumers.kafka; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; import com.teragrep.rlo_06.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,9 +59,9 @@ public final class PartitionRecordsImpl implements PartitionRecords { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionRecordsImpl.class); private final List kafkaRecordList; - private final ConfigurationImpl config; + private final NewCommonConfiguration config; - public PartitionRecordsImpl(ConfigurationImpl config) { + public PartitionRecordsImpl(NewCommonConfiguration config) { this.kafkaRecordList = new ArrayList<>(); this.config = config; } @@ -79,7 +79,7 @@ public List toSyslogRecordList() { syslogRecordList.add(next.toSyslogRecord()); } catch (ParseException e) { - if (config.valueOf("skipNonRFC5424Records").equalsIgnoreCase("true")) { + if (config.skipNonRFC5424Records()) { LOGGER .warn( "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", @@ -92,7 +92,7 @@ public List toSyslogRecordList() { } } catch (NullPointerException e) { - if (config.valueOf("skipEmptyRFC5424Records").equalsIgnoreCase("true")) { + if (config.skipEmptyRFC5424Records()) { LOGGER .warn( "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index fb4ca254..c241af87 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -45,7 +45,9 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.ByteArrayDeserializer; @@ -59,13 +61,17 @@ public final class ReadCoordinator implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(ReadCoordinator.class); private final String queueTopic; - private final ConfigurationImpl config; + private final NewCommonConfiguration config; + private final NewHdfsConfiguration hdfsConfig; + private final NewKafkaConfiguration kafkaConfig; private final BatchDistributionImpl callbackFunction; private final Map hdfsStartOffsets; public ReadCoordinator( String queueTopic, - ConfigurationImpl config, + NewCommonConfiguration config, + NewKafkaConfiguration kafkaConfig, + NewHdfsConfiguration hdfsConfig, BatchDistributionImpl callbackFunction, Map hdfsStartOffsets ) { @@ -73,6 +79,8 @@ public ReadCoordinator( this.config = config; this.callbackFunction = callbackFunction; this.hdfsStartOffsets = hdfsStartOffsets; + this.kafkaConfig = kafkaConfig; + this.hdfsConfig = hdfsConfig; } private KafkaReader createKafkaReader( @@ -91,7 +99,7 @@ private KafkaReader createKafkaReader( consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( kafkaConsumer, callbackFunctionInput, - config + hdfsConfig ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } @@ -100,7 +108,7 @@ else if (Objects.equals(name, "testConsumerTopic2")) { consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( kafkaConsumer, callbackFunctionInput, - config + hdfsConfig ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } @@ -109,7 +117,7 @@ else if (Objects.equals(name, "testConsumerTopic2")) { consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( kafkaConsumer, callbackFunctionInput, - config + hdfsConfig ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } @@ -123,7 +131,7 @@ else if (Objects.equals(name, "testConsumerTopic2")) { consumerRebalanceListenerImpl = new ConsumerRebalanceListenerImpl( kafkaConsumer, callbackFunctionInput, - config + hdfsConfig ); kafkaConsumer.subscribe(Collections.singletonList(topic), consumerRebalanceListenerImpl); } @@ -145,19 +153,18 @@ else if (Objects.equals(name, "testConsumerTopic2")) { // Part or Runnable implementation, called when the thread is started. @Override public void run() { - boolean useMockKafkaConsumer = Boolean.parseBoolean(config.valueOf("useMockKafkaConsumer")); + boolean useMockKafkaConsumer = kafkaConfig.useMockKafkaConsumer(); Properties kafkaProperties = new Properties(); - kafkaProperties.put("bootstrap.servers", config.valueOf("bootstrap.servers")); - kafkaProperties.put("auto.offset.reset", config.valueOf("auto.offset.reset")); - kafkaProperties.put("enable.auto.commit", config.valueOf("enable.auto.commit")); - kafkaProperties.put("group.id", config.valueOf("group.id")); - kafkaProperties.put("security.protocol", config.valueOf("security.protocol")); - kafkaProperties.put("sasl.mechanism", config.valueOf("sasl.mechanism")); - kafkaProperties.put("max.poll.records", config.valueOf("max.poll.records")); - kafkaProperties.put("fetch.max.bytes", config.valueOf("fetch.max.bytes")); - kafkaProperties.put("request.timeout.ms", config.valueOf("request.timeout.ms")); - kafkaProperties.put("max.poll.interval.ms", config.valueOf("max.poll.interval.ms")); - kafkaProperties.put("useMockKafkaConsumer", config.valueOf("useMockKafkaConsumer")); + kafkaProperties.put("bootstrap.servers", kafkaConfig.bootstrapServers()); + kafkaProperties.put("auto.offset.reset", kafkaConfig.autoOffsetReset()); + kafkaProperties.put("enable.auto.commit", kafkaConfig.enableAutoCommit()); + kafkaProperties.put("group.id", kafkaConfig.groupId()); + kafkaProperties.put("security.protocol", kafkaConfig.securityProtocol()); + kafkaProperties.put("sasl.mechanism", kafkaConfig.saslMechanism()); + kafkaProperties.put("max.poll.records", kafkaConfig.maxPollRecords()); + kafkaProperties.put("fetch.max.bytes", kafkaConfig.fetchMaxBytes()); + kafkaProperties.put("request.timeout.ms", kafkaConfig.requestTimeoutMs()); + kafkaProperties.put("max.poll.interval.ms", kafkaConfig.maxPollIntervalMs()); try ( KafkaReader kafkaReader = createKafkaReader( kafkaProperties, queueTopic, callbackFunction, useMockKafkaConsumer diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index be62b0b3..48736d22 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -46,7 +46,8 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -69,7 +70,9 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.CopyOnWriteArrayList; import java.util.function.Consumer; @@ -82,25 +85,48 @@ public class BatchDistributionTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "3000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "kerberos"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); } @@ -127,6 +153,7 @@ public void normalRecordsTest() { BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -318,7 +345,7 @@ record = new ConsumerRecord<>( // Assert that records 11-13 are present in local avro-file. - File queueDirectory = new File(config.valueOf("queueDirectory")); + File queueDirectory = new File(config.queueDirectory()); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(1, files.length); @@ -336,9 +363,9 @@ record = new ConsumerRecord<>( // Assert that records 0-10 are present in HDFS - Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.10"))); - Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.10"); + Assertions.assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.10"))); + Path hdfsreadpath = new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.10"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -359,9 +386,9 @@ record = new ConsumerRecord<>( List kafkaRecordListEmpty = new ArrayList<>(); output.accept(kafkaRecordListEmpty); - Assertions.assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.13"))); - hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.13"); + Assertions.assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.13"))); + hdfsreadpath = new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.13"); //Init input stream FSDataInputStream inputStream2 = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -393,6 +420,7 @@ public void skipNonRFC5424DatabaseOutputTest() { Consumer> output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -449,13 +477,13 @@ public void skipNonRFC5424DatabaseOutputTest() { kafkaRecordList.add(kafkaRecord3); output.accept(kafkaRecordList); output.accept(new ArrayList<>()); - Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"))); + Assertions.assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.3"))); // File in hdfs does not contain any empty records. // Assert that the file in hdfs contains the expected one record. - Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"); + Path hdfsreadpath = new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.3"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -491,6 +519,7 @@ public void skipNullRFC5424DatabaseOutputTest() { Consumer> output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -530,13 +559,13 @@ public void skipNullRFC5424DatabaseOutputTest() { kafkaRecordList.add(kafkaRecord3); output.accept(kafkaRecordList); output.accept(new ArrayList<>()); - Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.2"))); + Assertions.assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.2"))); // File in hdfs does not contain any records, but acts as a marker for kafka consumer offsets. // Assert that the file in hdfs contains the expected zero record. - Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.2"); + Path hdfsreadpath = new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.2"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -572,6 +601,7 @@ public void skipNullAndNonRFC5424DatabaseOutputTest() { Consumer> output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -616,12 +646,12 @@ record = new ConsumerRecord<>( kafkaRecordList.add(kafkaRecord); output.accept(kafkaRecordList); output.accept(new ArrayList<>()); - Assertions.assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "topicName")).length); - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"))); + Assertions.assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "topicName")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.3"))); // Assert that the file in hdfs contains the expected single record. - Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.3"); + Path hdfsreadpath = new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.3"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 0cf02dff..ba038061 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -47,7 +47,8 @@ import com.google.gson.JsonObject; import com.google.gson.JsonParser; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSWrite; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -60,6 +61,8 @@ import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -70,25 +73,47 @@ public class HdfsTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Start minicluster and initialize config. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "3000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "kerberos"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); } @@ -106,11 +131,11 @@ public void teardownMiniCluster() { public void hdfsWriteTest() { // This test case is for testing the functionality of the HDFSWrite.java by writing pre-generated AVRO-files to the HDFS database and asserting the results are correct. assertDoesNotThrow(() -> { - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); String pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.9"; java.nio.file.Path sourceFile = Paths.get(pathname); - java.nio.file.Path targetDir = Paths.get(config.valueOf("queueDirectory")); + java.nio.file.Path targetDir = Paths.get(config.queueDirectory()); java.nio.file.Path targetFile = targetDir.resolve(sourceFile.getFileName()); Assertions.assertFalse(targetFile.toFile().exists()); Files.copy(sourceFile, targetFile); @@ -119,37 +144,35 @@ public void hdfsWriteTest() { JsonObject recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); - try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9)) { + try (HDFSWrite writer = new HDFSWrite(hdfsConfig, "testConsumerTopic", "0", 9)) { writer.commit(avroFile); // commits avroFile to HDFS. } targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.13"; sourceFile = Paths.get(pathname); - targetDir = Paths.get(config.valueOf("queueDirectory")); + targetDir = Paths.get(config.queueDirectory()); targetFile = targetDir.resolve(sourceFile.getFileName()); Files.copy(sourceFile, targetFile); Assertions.assertTrue(targetFile.toFile().exists()); - avroFile = new File(config.valueOf("queueDirectory") + "/0.13"); + avroFile = new File(config.queueDirectory() + "/0.13"); recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":13}") .getAsJsonObject(); - try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 13)) { + try (HDFSWrite writer = new HDFSWrite(hdfsConfig, "testConsumerTopic", "0", 13)) { writer.commit(avroFile); // commits avroFile to HDFS and deletes avroFile afterward. } targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } @@ -157,11 +180,11 @@ public void hdfsWriteTest() { public void hdfsWriteExceptionTest() { // This test case is for testing the functionality of the HDFSWrite.java exception handling by trying to write the same file twice and asserting that the proper exception is thrown. assertDoesNotThrow(() -> { - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); String pathname = System.getProperty("user.dir") + "/src/test/resources/mockHdfsFiles/0.9"; java.nio.file.Path sourceFile = Paths.get(pathname); - java.nio.file.Path targetDir = Paths.get(config.valueOf("queueDirectory")); + java.nio.file.Path targetDir = Paths.get(config.queueDirectory()); java.nio.file.Path targetFile = targetDir.resolve(sourceFile.getFileName()); Assertions.assertFalse(targetFile.toFile().exists()); Files.copy(sourceFile, targetFile); @@ -170,23 +193,22 @@ public void hdfsWriteExceptionTest() { JsonObject recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); - try (HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9)) { + try (HDFSWrite writer = new HDFSWrite(hdfsConfig, "testConsumerTopic", "0", 9)) { writer.commit(avroFile); // commits avroFile to HDFS. } targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Files.copy(sourceFile, targetFile); Assertions.assertTrue(targetFile.toFile().exists()); - avroFile = new File(config.valueOf("queueDirectory") + "/0.9"); + avroFile = new File(config.queueDirectory() + "/0.9"); recordOffsetJo = JsonParser .parseString("{\"topic\":\"testConsumerTopic\", \"partition\":0, \"offset\":9}") .getAsJsonObject(); - HDFSWrite writer = new HDFSWrite(config, "testConsumerTopic", "0", 9); + HDFSWrite writer = new HDFSWrite(hdfsConfig, "testConsumerTopic", "0", 9); File finalAvroFile = avroFile; Exception e = Assertions.assertThrows(Exception.class, () -> writer.commit(finalAvroFile)); Assertions.assertEquals("File 0.9 already exists", e.getMessage()); @@ -194,9 +216,8 @@ public void hdfsWriteExceptionTest() { targetFile.toFile().delete(); // writer no longer handles deletion of the files Assertions.assertFalse(targetFile.toFile().exists()); Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index 4e948cda..8b94eebb 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -46,7 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -72,27 +74,64 @@ public class Ingestion0FilesLowSizeTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion0FilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; + private static NewKafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "3000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("maximumFileSize", "3000"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); + + Map kafkaMap = new HashMap<>(); + kafkaMap.put("java.security.auth.login.config", "/opt/teragrep/cfe_39/etc/config.jaas"); + kafkaMap.put("bootstrap.servers", "test"); + kafkaMap.put("auto.offset.reset", "earliest"); + kafkaMap.put("enable.auto.commit", "false"); + kafkaMap.put("group.id", "cfe_39"); + kafkaMap.put("security.protocol", "SASL_PLAINTEXT"); + kafkaMap.put("sasl.mechanism", "PLAIN"); + kafkaMap.put("max.poll.records", "500"); + kafkaMap.put("fetch.max.bytes", "1073741820"); + kafkaMap.put("request.timeout.ms", "300000"); + kafkaMap.put("max.poll.interval.ms", "300000"); + kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaConfig = new NewKafkaConfiguration(kafkaMap); }); } @@ -116,18 +155,18 @@ public void ingestion0FilesLowSizeTest() { Maximum file size is set to 3,000 in the config. Empty HDFS database, 140 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in two avro-files per partition based on MaximumFileSize.*/ assertDoesNotThrow(() -> { - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct. + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); }); // Assert that the kafka records were ingested correctly and the database holds the correct 140 records. // Check that the files were properly written to HDFS. - String hdfsuri = config.valueOf("hdfsuri"); + String hdfsuri = hdfsConfig.hdfsUri(); - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // ====== Init HDFS File System Object Configuration conf = new Configuration(); // Set FileSystem URI @@ -147,30 +186,30 @@ public void ingestion0FilesLowSizeTest() { Assertions.assertTrue(fs.exists(newDirectoryPath)); // Assert that the kafka records were ingested correctly and the database holds the expected 20 files. - FileStatus[] fileStatuses = fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")); + FileStatus[] fileStatuses = fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")); Assertions - .assertEquals(10, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(10, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "1.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "1.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "2.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "2.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "3.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "3.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "4.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "4.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "5.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "5.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "6.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "6.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "7.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "7.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "8.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "8.10"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "9.10"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "9.10"))); LOGGER.debug("All expected files present in HDFS."); // Now Assert the files that were too small to be stored in HDFS. @@ -182,7 +221,7 @@ public void ingestion0FilesLowSizeTest() { for (String fileName : filenameList) { - String path2 = config.valueOf("queueDirectory") + "/" + fileName; + String path2 = config.queueDirectory() + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 16e8f98d..bc0e4290 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -46,7 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -70,27 +72,64 @@ public class Ingestion0FilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion0FilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; + private static NewKafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "30000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("maximumFileSize", "30000"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); + + Map kafkaMap = new HashMap<>(); + kafkaMap.put("java.security.auth.login.config", "/opt/teragrep/cfe_39/etc/config.jaas"); + kafkaMap.put("bootstrap.servers", "test"); + kafkaMap.put("auto.offset.reset", "earliest"); + kafkaMap.put("enable.auto.commit", "false"); + kafkaMap.put("group.id", "cfe_39"); + kafkaMap.put("security.protocol", "SASL_PLAINTEXT"); + kafkaMap.put("sasl.mechanism", "PLAIN"); + kafkaMap.put("max.poll.records", "500"); + kafkaMap.put("fetch.max.bytes", "1073741820"); + kafkaMap.put("request.timeout.ms", "300000"); + kafkaMap.put("max.poll.interval.ms", "300000"); + kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaConfig = new NewKafkaConfiguration(kafkaMap); }); } @@ -114,15 +153,15 @@ public void ingestion0FilesTest() { Maximum file size is set to 30,000 in the config. Empty HDFS database, 160 records in mock kafka consumer ready for ingestion. All 14 records for each 10 topic partitions are stored in a single avro-file per partition (2 skipped records per file).*/ assertDoesNotThrow(() -> { - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct. + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); }); // Assert that the kafka records were ingested correctly and the database/temporary file holds the correct 140 records (20 broken records were skipped). assertDoesNotThrow(() -> { - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; Path newDirectoryPath = new Path(path); Assertions.assertTrue(fs.exists(newDirectoryPath)); @@ -138,7 +177,7 @@ public void ingestion0FilesTest() { // Assert that all the records are inside the temporary AVRO-files generated by PartitionFile objects during consumption. - File queueDirectory = new File(config.valueOf("queueDirectory")); + File queueDirectory = new File(config.queueDirectory()); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(10, files.length); for (File file : files) { @@ -148,7 +187,7 @@ public void ingestion0FilesTest() { int partitionCounter = 0; for (String fileName : filenameList) { - String path2 = config.valueOf("queueDirectory") + "/" + fileName; + String path2 = config.queueDirectory() + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 3a343797..cc7c87ff 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -46,7 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -62,10 +64,7 @@ import java.io.File; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -76,30 +75,67 @@ public class Ingestion1Old1NewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion1Old1NewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; + private static NewKafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "30000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("maximumFileSize", "30000"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); + + Map kafkaMap = new HashMap<>(); + kafkaMap.put("java.security.auth.login.config", "/opt/teragrep/cfe_39/etc/config.jaas"); + kafkaMap.put("bootstrap.servers", "test"); + kafkaMap.put("auto.offset.reset", "earliest"); + kafkaMap.put("enable.auto.commit", "false"); + kafkaMap.put("group.id", "cfe_39"); + kafkaMap.put("security.protocol", "SASL_PLAINTEXT"); + kafkaMap.put("sasl.mechanism", "PLAIN"); + kafkaMap.put("max.poll.records", "500"); + kafkaMap.put("fetch.max.bytes", "1073741820"); + kafkaMap.put("request.timeout.ms", "300000"); + kafkaMap.put("max.poll.interval.ms", "300000"); + kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaConfig = new NewKafkaConfiguration(kafkaMap); // Inserts pre-made avro-files to HDFS where one file has new timestamp and other old, which are normally generated during data ingestion from mock kafka consumer. - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -154,29 +190,27 @@ public void ingestion1Old1NewFileTest() { assertDoesNotThrow(() -> { // Assert the known starting state. - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); - Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue((System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset"))) > 157784760000L); - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct. + Assertions.assertTrue((System.currentTimeMillis() - hdfsConfig.pruneOffset()) > 157784760000L); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); // Assert that the kafka records were ingested and pruned correctly and the database holds only the expected 1 file. Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); Assertions - .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); // Assert the avro-files that were too small to be stored in HDFS. - String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; + String path1 = config.queueDirectory() + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); Assertions.assertFalse(avroFile1.exists()); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. @@ -185,7 +219,7 @@ public void ingestion1Old1NewFileTest() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.valueOf("queueDirectory") + "/" + fileName; + String path2 = config.queueDirectory() + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 6b33b842..71f88ff5 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -46,7 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -60,10 +62,7 @@ import java.io.File; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -74,30 +73,67 @@ public class Ingestion2NewFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion2NewFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; + private static NewKafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "30000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("maximumFileSize", "30000"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); + + Map kafkaMap = new HashMap<>(); + kafkaMap.put("java.security.auth.login.config", "/opt/teragrep/cfe_39/etc/config.jaas"); + kafkaMap.put("bootstrap.servers", "test"); + kafkaMap.put("auto.offset.reset", "earliest"); + kafkaMap.put("enable.auto.commit", "false"); + kafkaMap.put("group.id", "cfe_39"); + kafkaMap.put("security.protocol", "SASL_PLAINTEXT"); + kafkaMap.put("sasl.mechanism", "PLAIN"); + kafkaMap.put("max.poll.records", "500"); + kafkaMap.put("fetch.max.bytes", "1073741820"); + kafkaMap.put("request.timeout.ms", "300000"); + kafkaMap.put("max.poll.interval.ms", "300000"); + kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaConfig = new NewKafkaConfiguration(kafkaMap); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -148,27 +184,25 @@ public void ingestion2NewFilesTest() { */ assertDoesNotThrow(() -> { // Assert the known starting state. - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct. + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); // Assert that the kafka records were ingested correctly and the database holds the expected 2 files. Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); // Assert the avro-files that were too small to be stored in HDFS. - String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; + String path1 = config.queueDirectory() + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); Assertions.assertFalse(avroFile1.exists()); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. @@ -177,7 +211,7 @@ public void ingestion2NewFilesTest() { filenameList.add("testConsumerTopic" + partition + "." + 1); } for (String fileName : filenameList) { - String path2 = config.valueOf("queueDirectory") + "/" + fileName; + String path2 = config.queueDirectory() + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 061197f1..b6cca4bf 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -46,7 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -62,10 +64,7 @@ import java.io.File; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -76,30 +75,67 @@ public class Ingestion2OldFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion2OldFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; + private static NewKafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "30000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("maximumFileSize", "30000"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); + + Map kafkaMap = new HashMap<>(); + kafkaMap.put("java.security.auth.login.config", "/opt/teragrep/cfe_39/etc/config.jaas"); + kafkaMap.put("bootstrap.servers", "test"); + kafkaMap.put("auto.offset.reset", "earliest"); + kafkaMap.put("enable.auto.commit", "false"); + kafkaMap.put("group.id", "cfe_39"); + kafkaMap.put("security.protocol", "SASL_PLAINTEXT"); + kafkaMap.put("sasl.mechanism", "PLAIN"); + kafkaMap.put("max.poll.records", "500"); + kafkaMap.put("fetch.max.bytes", "1073741820"); + kafkaMap.put("request.timeout.ms", "300000"); + kafkaMap.put("max.poll.interval.ms", "300000"); + kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaConfig = new NewKafkaConfiguration(kafkaMap); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -154,25 +190,23 @@ public void ingestion2OldFilesTest() { assertDoesNotThrow(() -> { // Assert the known starting state. - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); - Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue((System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset"))) > 157784760000L); - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct. + Assertions.assertTrue((System.currentTimeMillis() - hdfsConfig.pruneOffset()) > 157784760000L); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); // Assert that the kafka records were ingested and pruned correctly and the database doesn't hold any files. Assertions - .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); // Assert the avro-files that were too small to be stored in HDFS. - String path1 = config.valueOf("queueDirectory") + "/" + "testConsumerTopic0.1"; + String path1 = config.queueDirectory() + "/" + "testConsumerTopic0.1"; File avroFile1 = new File(path1); Assertions.assertFalse(avroFile1.exists()); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. @@ -181,7 +215,7 @@ public void ingestion2OldFilesTest() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.valueOf("queueDirectory") + "/" + fileName; + String path2 = config.queueDirectory() + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java index 49249be7..c60e0ba1 100644 --- a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java +++ b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java @@ -46,7 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileStream; import org.apache.avro.specific.SpecificDatumReader; @@ -62,6 +64,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -70,28 +74,64 @@ public class IngestionConsumerTimeoutTest { private static final Logger LOGGER = LoggerFactory.getLogger(IngestionConsumerTimeoutTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; + private static NewKafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "3000000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "1000"); // Low consumerTimeout + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("maximumFileSize", "3000000"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - config.with("consumerTimeout", "1000"); // Low consumerTimeout - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); + + Map kafkaMap = new HashMap<>(); + kafkaMap.put("java.security.auth.login.config", "/opt/teragrep/cfe_39/etc/config.jaas"); + kafkaMap.put("bootstrap.servers", "test"); + kafkaMap.put("auto.offset.reset", "earliest"); + kafkaMap.put("enable.auto.commit", "false"); + kafkaMap.put("group.id", "cfe_39"); + kafkaMap.put("security.protocol", "SASL_PLAINTEXT"); + kafkaMap.put("sasl.mechanism", "PLAIN"); + kafkaMap.put("max.poll.records", "500"); + kafkaMap.put("fetch.max.bytes", "1073741820"); + kafkaMap.put("request.timeout.ms", "300000"); + kafkaMap.put("max.poll.interval.ms", "300000"); + kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaConfig = new NewKafkaConfiguration(kafkaMap); }); } @@ -113,46 +153,46 @@ public void teardownMiniCluster() { public void ingestion0FilesTest() { /*This test case is for testing the functionality of the consumerTimeout.*/ assertDoesNotThrow(() -> { - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct. - Assertions.assertEquals(1000, Long.parseLong(config.valueOf("consumerTimeout"))); - Assertions.assertEquals(3000000, Long.parseLong(config.valueOf("maximumFileSize"))); - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); - HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct. + Assertions.assertEquals(1000, config.consumerTimeout()); + Assertions.assertEquals(3000000, config.maximumFileSize()); + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); + HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); }); // Assert that the kafka records were ingested correctly, HDFS should hold all the records even though maximumFileSize is set higher than expected file sizes. assertDoesNotThrow(() -> { - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; Path newDirectoryPath = new Path(path); Assertions.assertTrue(fs.exists(newDirectoryPath)); FileStatus[] fileStatuses = fs.listStatus(newDirectoryPath); Assertions.assertEquals(10, fileStatuses.length); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "1.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "1.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "2.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "2.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "3.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "3.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "4.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "4.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "5.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "5.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "6.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "6.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "7.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "7.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "8.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "8.13"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "9.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "9.13"))); // Assert that the expected records are present in hdfs files for (int i = 0; i <= 9; i++) { - Path hdfsreadpath = new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + i + ".13"); + Path hdfsreadpath = new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + i + ".13"); //Init input stream FSDataInputStream inputStream = fs.open(hdfsreadpath); //The data is in AVRO-format, so it can't be read as a string. @@ -169,7 +209,7 @@ public void ingestion0FilesTest() { } // Assert that all the temporary AVRO-files generated by PartitionFile objects during consumption were deleted to prepare for new records. - File queueDirectory = new File(config.valueOf("queueDirectory")); + File queueDirectory = new File(config.queueDirectory()); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(0, files.length); }); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 7a4d1804..e3e2e143 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -46,7 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -74,27 +76,64 @@ public class KafkaConsumerTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; + private static NewKafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration with skipping of broken records disabled. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "30000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("maximumFileSize", "30000"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); + + Map kafkaMap = new HashMap<>(); + kafkaMap.put("java.security.auth.login.config", "/opt/teragrep/cfe_39/etc/config.jaas"); + kafkaMap.put("bootstrap.servers", "test"); + kafkaMap.put("auto.offset.reset", "earliest"); + kafkaMap.put("enable.auto.commit", "false"); + kafkaMap.put("group.id", "cfe_39"); + kafkaMap.put("security.protocol", "SASL_PLAINTEXT"); + kafkaMap.put("sasl.mechanism", "PLAIN"); + kafkaMap.put("max.poll.records", "500"); + kafkaMap.put("fetch.max.bytes", "1073741820"); + kafkaMap.put("request.timeout.ms", "300000"); + kafkaMap.put("max.poll.interval.ms", "300000"); + kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaConfig = new NewKafkaConfiguration(kafkaMap); }); } @@ -117,12 +156,14 @@ public void readCoordinatorTest2Threads() { // BatchDistributionImpl can not be used as a functional interface. BatchDistributionImpl output1 = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics ); BatchDistributionImpl output2 = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -131,6 +172,8 @@ public void readCoordinatorTest2Threads() { ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", config, + kafkaConfig, + hdfsConfig, output1, hdfsStartOffsets ); @@ -140,6 +183,8 @@ public void readCoordinatorTest2Threads() { ReadCoordinator readCoordinator2 = new ReadCoordinator( "testConsumerTopic", config, + kafkaConfig, + hdfsConfig, output2, hdfsStartOffsets ); @@ -156,7 +201,7 @@ public void readCoordinatorTest2Threads() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.valueOf("queueDirectory") + "/" + fileName; + String path2 = config.queueDirectory() + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); @@ -184,6 +229,7 @@ public void readCoordinatorTest1Thread() { // BatchDistributionImpl can not be used as a functional interface. BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -192,6 +238,8 @@ public void readCoordinatorTest1Thread() { ReadCoordinator readCoordinator = new ReadCoordinator( "testConsumerTopic", config, + kafkaConfig, + hdfsConfig, output, hdfsStartOffsets ); @@ -208,7 +256,7 @@ public void readCoordinatorTest1Thread() { filenameList.add("testConsumerTopic" + i + "." + 1); } for (String fileName : filenameList) { - String path2 = config.valueOf("queueDirectory") + "/" + fileName; + String path2 = config.queueDirectory() + "/" + fileName; File avroFile = new File(path2); Assertions.assertTrue(filenameList.contains(avroFile.getName())); DatumReader datumReader = new SpecificDatumReader<>(SyslogRecord.class); diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 0f7a51f0..3223ef4d 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -45,7 +45,8 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -66,7 +67,9 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.CopyOnWriteArrayList; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -78,26 +81,48 @@ public class ProcessingFailureTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewCommonConfiguration config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration with skipping of broken records disabled. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/failProcessing.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "3000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "false"); + map.put("skipEmptyRFC5424Records", "false"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); } @@ -124,6 +149,7 @@ public void failNonRFC5424DatabaseOutputTest() { BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -148,11 +174,11 @@ public void failNonRFC5424DatabaseOutputTest() { recordOffsetObjectList.add(recordOffsetObject); Exception e = Assertions.assertThrows(Exception.class, () -> output.accept(recordOffsetObjectList)); Assertions.assertEquals("com.teragrep.rlo_06.PriorityParseException: PRIORITY < missing", e.getMessage()); - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.1"))); + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. // Assert the local avro file that should e empty. - File queueDirectory = new File(config.valueOf("queueDirectory")); + File queueDirectory = new File(config.queueDirectory()); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(0, files.length); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. }); @@ -172,6 +198,7 @@ public void failNullRFC5424DatabaseOutputTest() { BatchDistributionImpl output = new BatchDistributionImpl( config, // Configuration settings + hdfsConfig, "topicName", // String, the name of the topic durationStatistics, // RuntimeStatistics object from metrics new TopicCounter("topicName") // TopicCounter object from metrics @@ -195,12 +222,16 @@ public void failNullRFC5424DatabaseOutputTest() { recordOffsetObjectList.add(recordOffsetObject); RuntimeException e = Assertions .assertThrows(RuntimeException.class, () -> output.accept(recordOffsetObjectList)); - Assertions.assertEquals("java.lang.NullPointerException", e.getMessage()); - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "topicName" + "/" + "0.1"))); + Assertions + .assertEquals( + "java.lang.NullPointerException: Cannot read the array length because \"buf\" is null", + e.getMessage() + ); + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "topicName" + "/" + "0.1"))); // No files stored to hdfs. - // Assert the local avro file that should e empty. - File queueDirectory = new File(config.valueOf("queueDirectory")); + // Assert the local avro file that should be empty. + File queueDirectory = new File(config.queueDirectory()); File[] files = queueDirectory.listFiles(); Assertions.assertEquals(0, files.length); // Partition 0 avro-file shouldn't exist because there are no records left in the buffer. }); diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index cc07e000..d4e31ce3 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -60,6 +60,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; @@ -68,26 +70,33 @@ public class PruningNoFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningNoFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Start minicluster and initialize config. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); } @@ -104,15 +113,15 @@ public void teardownMiniCluster() { public void noFiles() { // This test case is for testing the functionality of the HDFSPrune.java when the target database is empty. assertDoesNotThrow(() -> { - Assertions.assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); - HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); + HDFSPrune hdfsPrune = new HDFSPrune(hdfsConfig, "testConsumerTopic", fs); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); int deleted = hdfsPrune.prune(); Assertions.assertEquals(0, deleted); Assertions - .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index a0ad14c4..728351c3 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -60,6 +60,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -72,29 +74,36 @@ public class PruningOneNewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneNewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts a single pre-made avro-file with a new timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -135,24 +144,21 @@ public void teardownMiniCluster() { @Test public void oneNewFileTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds a file with a timestamp that shouldn't trigger pruning of old files. - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions - .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions.assertTrue(System.currentTimeMillis() - hdfsConfig.pruneOffset() > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); - HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + HDFSPrune hdfsPrune = new HDFSPrune(hdfsConfig, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(0, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index f07923d0..ddc15401 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -60,6 +60,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -72,29 +74,36 @@ public class PruningOneOldFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneOldFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts a single pre-made avro-file with an olf timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -135,24 +144,22 @@ public void teardownMiniCluster() { @Test public void oneOldFileTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds a file with a timestamp that should trigger pruning of old files. - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions - .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions.assertTrue(System.currentTimeMillis() - hdfsConfig.pruneOffset() > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); - HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); + HDFSPrune hdfsPrune = new HDFSPrune(hdfsConfig, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(1, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); Assertions - .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index c4f495a7..c96a5ac1 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -60,6 +60,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -72,30 +74,38 @@ public class PruningOneOldOneNewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneOldOneNewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); + // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); /* Inserts pre-made avro-files to HDFS, which are normally generated during data ingestion from mock kafka consumer. One file has new timestamp and another old timestamp.*/ - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -139,28 +149,26 @@ public void teardownMiniCluster() { public void oneOldOneNewFileTest() { /* This test case is for testing the functionality of the HDFSPrune.java when the database holds a file with a timestamp that shouldn't trigger pruning of old files and another file that should trigger the pruning. The file with newer timestamp is ignored while the older is deleted from the database.*/ - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions - .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions.assertTrue(System.currentTimeMillis() - hdfsConfig.pruneOffset() > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); - Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); - HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + HDFSPrune hdfsPrune = new HDFSPrune(hdfsConfig, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(1, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(1, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(1, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); Assertions - .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index f64d0f33..78b1e88d 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -60,6 +60,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -72,30 +74,37 @@ public class PruningTwoNewFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningTwoNewFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -137,25 +146,23 @@ public void teardownMiniCluster() { @Test public void twoNewFilesTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds two files with a timestamp that shouldn't trigger pruning of old files. - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. assertDoesNotThrow(() -> { Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); - HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + HDFSPrune hdfsPrune = new HDFSPrune(hdfsConfig, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(0, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index c256790c..b1d247c1 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -60,6 +60,8 @@ import java.io.File; import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -72,30 +74,37 @@ public class PruningTwoOldFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningTwoOldFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static ConfigurationImpl config; + private static NewHdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); - hdfsCluster = new TestMiniClusterFactory().create(config, baseDir); - config.with("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); - fs = new TestFileSystemFactory().create(config.valueOf("hdfsuri")); + hdfsCluster = new TestMiniClusterFactory().create(baseDir); + Map hdfsMap = new HashMap<>(); + hdfsMap.put("pruneOffset", "157784760000"); + hdfsMap.put("hdfsuri", "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/"); + hdfsMap.put("hdfsPath", "hdfs:///opt/teragrep/cfe_39/srv/"); + hdfsMap.put("java.security.krb5.kdc", "test"); + hdfsMap.put("java.security.krb5.realm", "test"); + hdfsMap.put("hadoop.security.authentication", "false"); + hdfsMap.put("hadoop.security.authorization", "test"); + hdfsMap.put("dfs.namenode.kerberos.principal.pattern", "test"); + hdfsMap.put("KerberosKeytabUser", "test"); + hdfsMap.put("KerberosKeytabPath", "test"); + hdfsMap.put("dfs.client.use.datanode.hostname", "false"); + hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); + hdfsMap.put("dfs.data.transfer.protection", "test"); + hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsConfig = new NewHdfsConfiguration(hdfsMap); + fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. - String path = config.valueOf("hdfsPath") + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" + String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" // Sets the directory where the data should be stored, if the directory doesn't exist then it's created. Path newDirectoryPath = new Path(path); // Create new Directory @@ -137,28 +146,26 @@ public void teardownMiniCluster() { @Test public void twoOldFilesTest() { // This test case is for testing the functionality of the HDFSPrune.java when the database holds two files with a timestamp that should trigger pruning of old files. - Assertions.assertTrue(Long.parseLong(config.valueOf("pruneOffset")) >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. - Assertions - .assertTrue(System.currentTimeMillis() - Long.parseLong(config.valueOf("pruneOffset")) > 157784760000L); + Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct, too low pruning offset can prune the files if the test is lagging. + Assertions.assertTrue(System.currentTimeMillis() - hdfsConfig.pruneOffset() > 157784760000L); assertDoesNotThrow(() -> { - Assertions.assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic"))); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); Assertions - .assertEquals(2, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(2, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); + Assertions.assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); - Assertions - .assertTrue(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); - HDFSPrune hdfsPrune = new HDFSPrune(config, "testConsumerTopic", fs); + .assertTrue(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); + HDFSPrune hdfsPrune = new HDFSPrune(hdfsConfig, "testConsumerTopic", fs); int deleted = hdfsPrune.prune(); Assertions.assertEquals(2, deleted); // Also check with HDFS access if expected files still exist. Assertions - .assertEquals(0, fs.listStatus(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic")).length); + .assertEquals(0, fs.listStatus(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic")).length); Assertions - .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.9"))); + .assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.9"))); Assertions - .assertFalse(fs.exists(new Path(config.valueOf("hdfsPath") + "/" + "testConsumerTopic" + "/" + "0.13"))); + .assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic" + "/" + "0.13"))); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index 7e4d0ef4..a141c743 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; +import com.teragrep.cfe_39.configuration.NewCommonConfiguration; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.consumers.kafka.SyslogAvroWriter; import org.apache.avro.file.DataFileReader; @@ -60,32 +60,39 @@ import java.io.File; import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; public class SyslogAvroWriterTest { - private static ConfigurationImpl config; + private static NewCommonConfiguration config; // Prepares known state for testing. @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - config = new ConfigurationImpl(); - config - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); - config.with("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - config.with("hadoop.security.authentication", "false"); + Map map = new HashMap<>(); + map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); + map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); + map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); + map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("maximumFileSize", "3000"); + map.put("queueTopicPattern", "^testConsumerTopic-*$"); + map.put("numOfConsumers", "2"); + map.put("skipNonRFC5424Records", "true"); + map.put("skipEmptyRFC5424Records", "true"); + map.put("pruneOffset", "157784760000"); + map.put("consumerTimeout", "600000"); + config = new NewCommonConfiguration(map); }); } // Teardown the minicluster @AfterEach public void teardownMiniCluster() { - File queueDirectory = new File(config.valueOf("queueDirectory")); + File queueDirectory = new File(config.queueDirectory()); File[] files = queueDirectory.listFiles(); if (files[0].getName().equals("topicName0.1")) { files[0].delete(); @@ -97,9 +104,9 @@ public void writeTest() { assertDoesNotThrow(() -> { - File queueDirectory = new File(config.valueOf("queueDirectory")); + File queueDirectory = new File(config.queueDirectory()); - File syslogFile = new File(config.valueOf("queueDirectory") + File.separator + "topicName0.1"); + File syslogFile = new File(config.queueDirectory() + File.separator + "topicName0.1"); ConsumerRecord record0 = new ConsumerRecord<>( "topicName", diff --git a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java index bf33648d..571452c5 100644 --- a/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java +++ b/src/test/java/com/teragrep/cfe_39/TestMiniClusterFactory.java @@ -45,7 +45,6 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.ConfigurationImpl; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -56,7 +55,7 @@ // Helper class for creating FileSystem objects. public class TestMiniClusterFactory { - public MiniDFSCluster create(ConfigurationImpl config, File baseDir) throws IOException { + public MiniDFSCluster create(File baseDir) throws IOException { MiniDFSCluster hdfsCluster; // Create a HDFS miniCluster Configuration conf = new Configuration(); From 1146a2374a08e6f6697ebea4b355841aae803202 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 15 Nov 2024 14:11:39 +0200 Subject: [PATCH 69/77] Removed old configuration classes and tests. --- .../cfe_39/configuration/Configuration.java | 60 ------- .../configuration/ConfigurationImpl.java | 147 ---------------- .../ConfigurationValidation.java | 54 ------ .../ConfigurationValidationImpl.java | 136 --------------- .../configuration/HdfsConfiguration.java | 103 ----------- .../HdfsConfigurationValidation.java | 114 ------------ .../configuration/KafkaConfiguration.java | 103 ----------- .../KafkaConfigurationValidation.java | 112 ------------ .../teragrep/cfe_39/ConfigurationTest.java | 164 ------------------ 9 files changed, 993 deletions(-) delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/Configuration.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java delete mode 100644 src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java delete mode 100644 src/test/java/com/teragrep/cfe_39/ConfigurationTest.java diff --git a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java b/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java deleted file mode 100644 index 10f7bdac..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/Configuration.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import java.io.IOException; - -public interface Configuration { - - public abstract void load(String configurationPath) throws IOException; - - public abstract void with(String key, String value); - - public abstract String valueOf(String key); - - public abstract boolean has(String key); - -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java deleted file mode 100644 index e5fdd6d9..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationImpl.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import org.apache.logging.log4j.core.config.Configurator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Properties; - -// This class will only hold the common configuration parameters. -public final class ConfigurationImpl implements Configuration { - - private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationImpl.class); - - private final Properties properties; - private final ConfigurationValidationImpl configurationValidationImpl; - private final Configuration hdfsConfiguration; - private final Configuration kafkaConfiguration; - - public ConfigurationImpl() { - this(new Properties(), new HdfsConfiguration(), new KafkaConfiguration()); - } - - public ConfigurationImpl( - Properties properties, - HdfsConfiguration hdfsConfiguration, - KafkaConfiguration kafkaConfiguration - ) { - this.properties = properties; - this.hdfsConfiguration = hdfsConfiguration; // Initializes HdfsConfiguration - this.kafkaConfiguration = kafkaConfiguration; // Initializes KafkaConfiguration - this.configurationValidationImpl = new ConfigurationValidationImpl(); - } - - // This method should load the common properties belonging to this configuration object, but it should also ask the other configuration objects to do the same. - @Override - public void load(String configurationPath) throws IOException { - Path configPath = Paths.get(configurationPath); - LOGGER.info("Loading application config <[{}]>", configPath.toAbsolutePath()); - try (InputStream inputStream = Files.newInputStream(configPath)) { - properties.load(inputStream); - LOGGER.debug("Got configuration: <{}>", properties); - configurationValidationImpl.validate(properties); - } - // also load the hdfs and kafka configuration files. - hdfsConfiguration - .load(properties.getProperty("egress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/egress.properties")); - kafkaConfiguration - .load(properties.getProperty("ingress.configurationFile", System.getProperty("user.dir") + "/rpm/resources/ingress.properties")); - configureLogging(); - } - - // Used only during testing to change existing property values, make a fake for this. - @Override - public void with(String key, String value) { - if (this.has(key)) { - properties.setProperty(key, value); - configurationValidationImpl.validate(properties); - } - else if (hdfsConfiguration.has(key)) { - hdfsConfiguration.with(key, value); - } - else if (kafkaConfiguration.has(key)) { - kafkaConfiguration.with(key, value); - } - else { - throw new IllegalArgumentException("Key not found: " + key); - } - } - - @Override - public String valueOf(String key) { - if (this.has(key)) { - return properties.getProperty(key); - } - if (kafkaConfiguration.has(key)) { - return kafkaConfiguration.valueOf(key); - } - if (hdfsConfiguration.has(key)) { - return hdfsConfiguration.valueOf(key); - } - throw new IllegalArgumentException("Key not found: " + key); - } - - @Override - public boolean has(String key) { - return properties.containsKey(key); - } - - private void configureLogging() throws IOException { - // Just for loggers to work - Path log4j2Config = Paths - .get(properties.getProperty("log4j2.configurationFile", System.getProperty("user.dir") + "/rpm/resources/log4j2.properties")); - LOGGER.info("Loading log4j2 config from <[{}]>", log4j2Config.toRealPath()); - Configurator.reconfigure(log4j2Config.toUri()); - } - -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java deleted file mode 100644 index 1b113983..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidation.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import java.util.Properties; - -public interface ConfigurationValidation { - - public abstract void validate(Properties properties); - -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java b/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java deleted file mode 100644 index 654306ca..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/ConfigurationValidationImpl.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.*; - -public final class ConfigurationValidationImpl implements ConfigurationValidation { - - private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationValidationImpl.class); - private final Set requiredKeys; - private final Set optionalKeys; - - public ConfigurationValidationImpl() { - this.requiredKeys = new HashSet<>(); - this.optionalKeys = new HashSet<>(); - } - - @Override - public void validate(Properties properties) { - validateKeys(properties); - validateValues(properties); - } - - private void validateKeys(Properties properties) { - if (requiredKeys.isEmpty() && optionalKeys.isEmpty()) { - loadRequiredKeys(); - loadOptionalKeys(); - } - int requiredCount = 0; - for (Map.Entry keyValuePair : properties.entrySet()) { - if (requiredKeys.contains(keyValuePair.getKey().toString())) { - requiredCount++; - } - else if (!optionalKeys.contains(keyValuePair.getKey().toString())) { - throw new IllegalStateException("Unauthorized key " + keyValuePair.getKey().toString()); - } - } - if (requiredCount < requiredKeys.size()) { - for (String key : requiredKeys) { - if (!properties.containsKey(key)) { - throw new IllegalStateException("Missing required key " + key); - } - } - } - } - - private void validateValues(Properties properties) { - // Check the requirements for the specific key-value pairs. - if (Long.parseLong(properties.getProperty("pruneOffset")) <= 0) { - throw new IllegalArgumentException( - "pruneOffset must be set to >0, got " + properties.getProperty("pruneOffset") - ); - } - if (Long.parseLong(properties.getProperty("maximumFileSize")) <= 0) { - throw new IllegalArgumentException( - "maximumFileSize must be set to >0, got " + properties.getProperty("maximumFileSize") - ); - } - if (Long.parseLong(properties.getProperty("numOfConsumers")) <= 0) { - throw new IllegalArgumentException( - "numOfConsumers must be set to >0, got " + properties.getProperty("numOfConsumers") - ); - } - if (Long.parseLong(properties.getProperty("consumerTimeout")) <= 0) { - throw new IllegalArgumentException( - "consumerTimeout must be set to >0, got " + properties.getProperty("consumerTimeout") - ); - } - } - - private void loadRequiredKeys() { - // Required keys - requiredKeys.add("pruneOffset"); - requiredKeys.add("queueDirectory"); - requiredKeys.add("maximumFileSize"); - requiredKeys.add("queueTopicPattern"); - requiredKeys.add("numOfConsumers"); - requiredKeys.add("consumerTimeout"); - requiredKeys.add("skipNonRFC5424Records"); - requiredKeys.add("skipEmptyRFC5424Records"); - } - - private void loadOptionalKeys() { - // Optional keys that have default values in place. - optionalKeys.add("log4j2.configurationFile"); - optionalKeys.add("ingress.configurationFile"); - optionalKeys.add("egress.configurationFile"); - } - -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java deleted file mode 100644 index ae345f7e..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Properties; - -public class HdfsConfiguration implements Configuration { - - private final Logger LOGGER = LoggerFactory.getLogger(HdfsConfiguration.class); - private final Properties properties; - private final HdfsConfigurationValidation configurationValidation; - - public HdfsConfiguration() { - this.properties = new Properties(); - this.configurationValidation = new HdfsConfigurationValidation(); - } - - @Override - public void load(String configurationPath) throws IOException { - Path configPath = Paths.get(configurationPath); - LOGGER.info("Loading hdfs config <[{}]>", configPath.toAbsolutePath()); - try (InputStream inputStream = Files.newInputStream(configPath)) { - properties.load(inputStream); - LOGGER.debug("Got configuration: <{}>", properties); - configurationValidation.validate(properties); - } - } - - @Override - public void with(String key, String value) { - if (this.has(key)) { - properties.setProperty(key, value); - configurationValidation.validate(properties); - } - else { - throw new IllegalArgumentException("Key not found: " + key); - } - } - - @Override - public String valueOf(String key) { - if (this.has(key)) { - return properties.getProperty(key); - } - throw new IllegalArgumentException("Key not found: " + key); - } - - @Override - public boolean has(String key) { - return properties.containsKey(key); - } -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java deleted file mode 100644 index 70bf5e97..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfigurationValidation.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.HashSet; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -public class HdfsConfigurationValidation implements ConfigurationValidation { - - private final Logger LOGGER = LoggerFactory.getLogger(HdfsConfigurationValidation.class); - private final Set requiredKeys; - - public HdfsConfigurationValidation() { - this.requiredKeys = new HashSet<>(); - } - - @Override - public void validate(Properties properties) { - validateKeys(properties); - validateValues(properties); - } - - private void validateKeys(Properties properties) { - if (requiredKeys.isEmpty()) { - loadRequiredKeys(); - } - int requiredCount = 0; - for (Map.Entry keyValuePair : properties.entrySet()) { - if (requiredKeys.contains(keyValuePair.getKey().toString())) { - requiredCount++; - } - else { - throw new IllegalStateException("Unauthorized key " + keyValuePair.getKey().toString()); - } - } - if (requiredCount < requiredKeys.size()) { - for (String key : requiredKeys) { - if (!properties.containsKey(key)) { - throw new IllegalStateException("Missing required key " + key); - } - } - } - } - - private void validateValues(Properties properties) { - } - - private void loadRequiredKeys() { - // HDFS - requiredKeys.add("hdfsPath"); - requiredKeys.add("hdfsuri"); - requiredKeys.add("dfs.client.use.datanode.hostname"); - requiredKeys.add("dfs.data.transfer.protection"); - requiredKeys.add("dfs.encrypt.data.transfer.cipher.suites"); - // Kerberos - requiredKeys.add("hadoop.security.authentication"); - requiredKeys.add("hadoop.security.authorization"); - requiredKeys.add("dfs.namenode.kerberos.principal.pattern"); - requiredKeys.add("java.security.krb5.kdc"); - requiredKeys.add("java.security.krb5.realm"); - requiredKeys.add("KerberosKeytabUser"); - requiredKeys.add("KerberosKeytabPath"); - requiredKeys.add("hadoop.kerberos.keytab.login.autorenewal.enabled"); - } - -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java deleted file mode 100644 index 8f632c2f..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Properties; - -public class KafkaConfiguration implements Configuration { - - private final Logger LOGGER = LoggerFactory.getLogger(KafkaConfiguration.class); - private final Properties properties; - private final KafkaConfigurationValidation configurationValidation; - - public KafkaConfiguration() { - this.properties = new Properties(); - this.configurationValidation = new KafkaConfigurationValidation(); - } - - @Override - public void load(String configurationPath) throws IOException { - Path configPath = Paths.get(configurationPath); - LOGGER.info("Loading hdfs config <[{}]>", configPath.toAbsolutePath()); - try (InputStream inputStream = Files.newInputStream(configPath)) { - properties.load(inputStream); - LOGGER.debug("Got configuration: <{}>", properties); - configurationValidation.validate(properties); - } - } - - @Override - public void with(String key, String value) { - if (this.has(key)) { - properties.setProperty(key, value); - configurationValidation.validate(properties); - } - else { - throw new IllegalArgumentException("Key not found: " + key); - } - } - - @Override - public String valueOf(String key) { - if (this.has(key)) { - return properties.getProperty(key); - } - throw new IllegalArgumentException("Key not found: " + key); - } - - @Override - public boolean has(String key) { - return properties.containsKey(key); - } -} diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java deleted file mode 100644 index 5095399d..00000000 --- a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfigurationValidation.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39.configuration; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.HashSet; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -public class KafkaConfigurationValidation implements ConfigurationValidation { - - private final Logger LOGGER = LoggerFactory.getLogger(KafkaConfigurationValidation.class); - private final Set requiredKeys; - - public KafkaConfigurationValidation() { - this.requiredKeys = new HashSet<>(); - } - - @Override - public void validate(Properties properties) { - validateKeys(properties); - validateValues(properties); - } - - private void validateKeys(Properties properties) { - if (requiredKeys.isEmpty()) { - loadRequiredKeys(); - } - int requiredCount = 0; - for (Map.Entry keyValuePair : properties.entrySet()) { - if (requiredKeys.contains(keyValuePair.getKey().toString())) { - requiredCount++; - } - else { - throw new IllegalStateException("Unauthorized key " + keyValuePair.getKey().toString()); - } - } - if (requiredCount < requiredKeys.size()) { - for (String key : requiredKeys) { - if (!properties.containsKey(key)) { - throw new IllegalStateException("Missing required key " + key); - } - } - } - } - - private void validateValues(Properties properties) { - } - - private void loadRequiredKeys() { - // kafka - requiredKeys.add("java.security.auth.login.config"); - requiredKeys.add("bootstrap.servers"); - requiredKeys.add("auto.offset.reset"); - requiredKeys.add("enable.auto.commit"); - requiredKeys.add("group.id"); - requiredKeys.add("security.protocol"); - requiredKeys.add("sasl.mechanism"); - requiredKeys.add("max.poll.records"); - requiredKeys.add("fetch.max.bytes"); - requiredKeys.add("request.timeout.ms"); - requiredKeys.add("max.poll.interval.ms"); - requiredKeys.add("useMockKafkaConsumer"); - } - -} diff --git a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java deleted file mode 100644 index 06d6f42c..00000000 --- a/src/test/java/com/teragrep/cfe_39/ConfigurationTest.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * HDFS Data Ingestion for PTH_06 use CFE-39 - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.cfe_39; - -import com.teragrep.cfe_39.configuration.ConfigurationImpl; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; - -public class ConfigurationTest { - - private final Logger LOGGER = LoggerFactory.getLogger(ConfigurationTest.class); - - @Disabled(value = "Preparing configuration refactoring") - @Test - public void kafkaPropertiesConfigurationTest() { - assertDoesNotThrow(() -> { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - ConfigurationImpl configuration = new ConfigurationImpl(); - configuration.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - // Test extracting useMockKafkaConsumer value from config. - boolean useMockKafkaConsumer = Boolean.parseBoolean(configuration.valueOf("useMockKafkaConsumer")); - Assertions.assertTrue(useMockKafkaConsumer); - LOGGER.debug("useMockKafkaConsumer: {}", useMockKafkaConsumer); - }); - } - - @Disabled(value = "Preparing configuration refactoring") - @Test - public void brokenConfigurationTest() { - // Set system properties to use the broken configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); - Exception e = Assertions.assertThrows(Exception.class, () -> { - ConfigurationImpl configuration = new ConfigurationImpl(); - configuration.load(System.getProperty("user.dir") + "/src/test/resources/broken.application.properties"); - }); - Assertions.assertEquals("Missing required key numOfConsumers", e.getMessage()); - } - - @Disabled(value = "Preparing configuration refactoring") - @Test - public void configurationEqualityTest() { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - assertDoesNotThrow(() -> { - ConfigurationImpl configuration1 = new ConfigurationImpl(); - configuration1.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - ConfigurationImpl configuration2 = new ConfigurationImpl(); - configuration2.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - ConfigurationImpl configuration3 = new ConfigurationImpl(); - configuration3.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - ConfigurationImpl configuration4 = new ConfigurationImpl(); - configuration4.load(System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Assertions.assertNotEquals(configuration1, configuration2); - Assertions.assertNotEquals(configuration1, configuration3); - Assertions.assertNotEquals(configuration3, configuration4); - configuration3.with("hdfsuri", "12345"); - configuration4.with("hdfsuri", "12345"); - Assertions.assertNotEquals(configuration1, configuration3); - Assertions.assertNotEquals(configuration3, configuration4); - }); - } - - @Disabled(value = "Preparing configuration refactoring") - @Test - public void configurationWithTest() { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - assertDoesNotThrow(() -> { - ConfigurationImpl configuration1 = new ConfigurationImpl(); - configuration1 - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); - ConfigurationImpl configuration2 = new ConfigurationImpl(); - configuration2 - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); - configuration2.with("hdfsuri", "12345"); - Assertions.assertEquals(configuration1.valueOf("hdfsuri"), "hdfs://localhost:45937/"); - Assertions.assertEquals(configuration2.valueOf("hdfsuri"), "12345"); - }); - } - - @Disabled(value = "Preparing configuration refactoring") - @Test - public void configurationWithFailTest() { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Exception e = Assertions.assertThrows(IllegalArgumentException.class, () -> { - ConfigurationImpl configuration = new ConfigurationImpl(); - configuration - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); - configuration.with("unauthorized_key", "12345"); - }); - Assertions.assertEquals("Key not found: unauthorized_key", e.getMessage()); - } - - @Disabled(value = "Preparing configuration refactoring") - @Test - public void configurationWithFailTest2() { - // Set system properties to use the valid configuration. - System - .setProperty("cfe_39.config.location", System.getProperty("user.dir") + "/src/test/resources/valid.application.properties"); - Exception e = Assertions.assertThrows(IllegalArgumentException.class, () -> { - ConfigurationImpl configuration = new ConfigurationImpl(); - configuration - .load(System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties")); - configuration.with("maximumFileSize", "0"); - }); - Assertions.assertEquals("maximumFileSize must be set to >0, got 0", e.getMessage()); - } - -} From 0dd9692321d7d3843839a9d417aa32d9d7d1fcdb Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Fri, 15 Nov 2024 14:23:45 +0200 Subject: [PATCH 70/77] Renamed NewCommonConfiguration.java to CommonConfiguration.java. Renamed NewHdfsConfiguration.java to HdfsConfiguration.java. Renamed NewKafkaConfiguration.java to KafkaConfiguration.java. Renamed related tests. --- src/main/java/com/teragrep/cfe_39/Main.java | 18 +++++++++--------- ...iguration.java => CommonConfiguration.java} | 6 +++--- ...nfiguration.java => HdfsConfiguration.java} | 6 +++--- ...figuration.java => KafkaConfiguration.java} | 6 +++--- .../consumers/kafka/BatchDistributionImpl.java | 12 ++++++------ .../kafka/ConsumerRebalanceListenerImpl.java | 6 +++--- .../consumers/kafka/FileSystemFactoryImpl.java | 11 +++++------ .../cfe_39/consumers/kafka/HDFSPrune.java | 4 ++-- .../cfe_39/consumers/kafka/HDFSRead.java | 4 ++-- .../cfe_39/consumers/kafka/HDFSWrite.java | 6 +++--- .../consumers/kafka/HdfsDataIngestion.java | 18 +++++++++--------- .../cfe_39/consumers/kafka/KafkaReader.java | 6 +++--- .../consumers/kafka/PartitionFileImpl.java | 10 +++++----- .../consumers/kafka/PartitionRecordsImpl.java | 6 +++--- .../consumers/kafka/ReadCoordinator.java | 18 +++++++++--------- .../teragrep/cfe_39/BatchDistributionTest.java | 12 ++++++------ ...nTest.java => CommonConfigurationTest.java} | 8 ++++---- ...ionTest.java => HdfsConfigurationTest.java} | 8 ++++---- .../java/com/teragrep/cfe_39/HdfsTest.java | 12 ++++++------ .../cfe_39/Ingestion0FilesLowSizeTest.java | 18 +++++++++--------- .../teragrep/cfe_39/Ingestion0FilesTest.java | 18 +++++++++--------- .../cfe_39/Ingestion1Old1NewFileTest.java | 18 +++++++++--------- .../cfe_39/Ingestion2NewFilesTest.java | 18 +++++++++--------- .../cfe_39/Ingestion2OldFilesTest.java | 18 +++++++++--------- .../cfe_39/IngestionConsumerTimeoutTest.java | 18 +++++++++--------- ...onTest.java => KafkaConfigurationTest.java} | 8 ++++---- .../com/teragrep/cfe_39/KafkaConsumerTest.java | 18 +++++++++--------- .../teragrep/cfe_39/ProcessingFailureTest.java | 12 ++++++------ .../teragrep/cfe_39/PruningNoFilesTest.java | 6 +++--- .../teragrep/cfe_39/PruningOneNewFileTest.java | 6 +++--- .../teragrep/cfe_39/PruningOneOldFileTest.java | 6 +++--- .../cfe_39/PruningOneOldOneNewFileTest.java | 6 +++--- .../cfe_39/PruningTwoNewFilesTest.java | 6 +++--- .../cfe_39/PruningTwoOldFilesTest.java | 6 +++--- .../teragrep/cfe_39/SyslogAvroWriterTest.java | 6 +++--- 35 files changed, 182 insertions(+), 183 deletions(-) rename src/main/java/com/teragrep/cfe_39/configuration/{NewCommonConfiguration.java => CommonConfiguration.java} (97%) rename src/main/java/com/teragrep/cfe_39/configuration/{NewHdfsConfiguration.java => HdfsConfiguration.java} (97%) rename src/main/java/com/teragrep/cfe_39/configuration/{NewKafkaConfiguration.java => KafkaConfiguration.java} (97%) rename src/test/java/com/teragrep/cfe_39/{NewCommonConfigurationTest.java => CommonConfigurationTest.java} (93%) rename src/test/java/com/teragrep/cfe_39/{NewHdfsConfigurationTest.java => HdfsConfigurationTest.java} (94%) rename src/test/java/com/teragrep/cfe_39/{NewKafkaConfigurationTest.java => KafkaConfigurationTest.java} (93%) diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index 7ec77aca..280b0c2c 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -45,9 +45,9 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import com.teragrep.cnf_01.ConfigurationException; import com.teragrep.cnf_01.PathConfiguration; @@ -64,7 +64,7 @@ public final class Main { private static final Logger LOGGER = LoggerFactory.getLogger(Main.class); public static void main(String[] args) throws Exception { - // NewCommonConfiguration + // CommonConfiguration final PathConfiguration pathConfiguration = new PathConfiguration( System.getProperty("cfe_39.config.location", "/opt/teragrep/cfe_39/etc/application.properties") ); @@ -76,14 +76,14 @@ public static void main(String[] args) throws Exception { LOGGER.error("Failed to create PathConfiguration: <{}>", e.getMessage()); throw e; } - NewCommonConfiguration commonConfig = new NewCommonConfiguration(map); + CommonConfiguration commonConfig = new CommonConfiguration(map); // log4j2 configuration Path log4j2Config = Paths .get(commonConfig.log4j2ConfigurationFile(), System.getProperty("user.dir") + "/rpm/resources/log4j2.properties"); Configurator.reconfigure(log4j2Config.toUri()); - // NewKafkaConfiguration + // KafkaConfiguration final PathConfiguration kafkaPathConfiguration = new PathConfiguration(commonConfig.egressConfigurationFile()); final Map kafkaMap; try { @@ -93,9 +93,9 @@ public static void main(String[] args) throws Exception { LOGGER.error("Failed to create PathConfiguration: <{}>", e.getMessage()); throw e; } - NewKafkaConfiguration kafkaConfig = new NewKafkaConfiguration(kafkaMap); + KafkaConfiguration kafkaConfig = new KafkaConfiguration(kafkaMap); - // NewHdfsConfiguration + // HdfsConfiguration final PathConfiguration hdfsPathConfiguration = new PathConfiguration(commonConfig.ingressConfigurationFile()); final Map hdfsMap; try { @@ -105,7 +105,7 @@ public static void main(String[] args) throws Exception { LOGGER.error("Failed to create PathConfiguration: <{}>", e.getMessage()); throw e; } - NewHdfsConfiguration hdfsConfig = new NewHdfsConfiguration(hdfsMap); + HdfsConfiguration hdfsConfig = new HdfsConfiguration(hdfsMap); LOGGER.info("Running main program"); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(commonConfig, hdfsConfig, kafkaConfig); diff --git a/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java similarity index 97% rename from src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java rename to src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java index eace92f6..9fcdc4ea 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/NewCommonConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java @@ -51,13 +51,13 @@ import java.util.Map; -public final class NewCommonConfiguration { +public final class CommonConfiguration { - private final Logger LOGGER = LoggerFactory.getLogger(NewCommonConfiguration.class); + private final Logger LOGGER = LoggerFactory.getLogger(CommonConfiguration.class); private final Map config; - public NewCommonConfiguration(Map map) { + public CommonConfiguration(Map map) { this.config = map; } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/NewHdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java similarity index 97% rename from src/main/java/com/teragrep/cfe_39/configuration/NewHdfsConfiguration.java rename to src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java index a522df18..c3edc5c6 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/NewHdfsConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java @@ -51,13 +51,13 @@ import java.util.Map; -public final class NewHdfsConfiguration { +public final class HdfsConfiguration { - private final Logger LOGGER = LoggerFactory.getLogger(NewHdfsConfiguration.class); + private final Logger LOGGER = LoggerFactory.getLogger(HdfsConfiguration.class); private final Map config; - public NewHdfsConfiguration(Map config) { + public HdfsConfiguration(Map config) { this.config = config; } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/NewKafkaConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java similarity index 97% rename from src/main/java/com/teragrep/cfe_39/configuration/NewKafkaConfiguration.java rename to src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java index 992da3cb..2db13e10 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/NewKafkaConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java @@ -51,13 +51,13 @@ import java.util.Map; -public final class NewKafkaConfiguration { +public final class KafkaConfiguration { - private final Logger LOGGER = LoggerFactory.getLogger(NewKafkaConfiguration.class); + private final Logger LOGGER = LoggerFactory.getLogger(KafkaConfiguration.class); private final Map config; - public NewKafkaConfiguration(Map config) { + public KafkaConfiguration(Map config) { this.config = config; } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index 9c8ea094..9ea47c3a 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -46,8 +46,8 @@ package com.teragrep.cfe_39.consumers.kafka; import com.google.gson.*; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import com.teragrep.cfe_39.metrics.DurationStatistics; import org.slf4j.Logger; @@ -70,13 +70,13 @@ public final class BatchDistributionImpl implements BatchDistribution { private final DurationStatistics durationStatistics; private final TopicCounter topicCounter; private long lastTimeCalled; - private final NewCommonConfiguration config; - private final NewHdfsConfiguration hdfsConfig; + private final CommonConfiguration config; + private final HdfsConfiguration hdfsConfig; private final Map partitionFileMap; public BatchDistributionImpl( - NewCommonConfiguration config, - NewHdfsConfiguration hdfsConfig, + CommonConfiguration config, + HdfsConfiguration hdfsConfig, String topic, DurationStatistics durationStatistics, TopicCounter topicCounter diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java index 5fb29962..2934bbe2 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ConsumerRebalanceListenerImpl.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import org.apache.hadoop.fs.FileSystem; import org.apache.kafka.clients.consumer.Consumer; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; @@ -64,12 +64,12 @@ public final class ConsumerRebalanceListenerImpl implements ConsumerRebalanceLis private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; - private final NewHdfsConfiguration config; + private final HdfsConfiguration config; public ConsumerRebalanceListenerImpl( Consumer kafkaConsumer, BatchDistributionImpl callbackFunction, - NewHdfsConfiguration config + HdfsConfiguration config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java index d6eaaf62..22419c87 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/FileSystemFactoryImpl.java @@ -45,11 +45,10 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.security.UserGroupInformation; import java.io.IOException; @@ -57,11 +56,11 @@ public final class FileSystemFactoryImpl implements FileSystemFactory { - private final HdfsConfiguration conf; - private final NewHdfsConfiguration configuration; + private final org.apache.hadoop.hdfs.HdfsConfiguration conf; + private final HdfsConfiguration configuration; - public FileSystemFactoryImpl(NewHdfsConfiguration configuration) { - this.conf = new HdfsConfiguration(); + public FileSystemFactoryImpl(HdfsConfiguration configuration) { + this.conf = new org.apache.hadoop.hdfs.HdfsConfiguration(); this.configuration = configuration; } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java index a58c8c83..137a6803 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSPrune.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -61,7 +61,7 @@ public final class HDFSPrune { private final Path newDirectoryPath; private final long cutOffEpoch; - public HDFSPrune(NewHdfsConfiguration config, String topicName, FileSystem fs) throws IOException { + public HDFSPrune(HdfsConfiguration config, String topicName, FileSystem fs) throws IOException { this.fs = fs; String path = config.hdfsPath().concat("/").concat(topicName); //==== Create directory if not exists diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java index e289a6cc..616ec120 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSRead.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import org.apache.hadoop.fs.*; import org.apache.kafka.common.TopicPartition; import org.slf4j.Logger; @@ -64,7 +64,7 @@ The offset map can then be used for kafka consumer seek() method, which will add private final FileSystem fs; private final String path; - public HDFSRead(NewHdfsConfiguration config, FileSystem fs) throws IOException { + public HDFSRead(HdfsConfiguration config, FileSystem fs) throws IOException { this.fs = fs; path = config.hdfsPath(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java index 105e7ed5..0475ca22 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HDFSWrite.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import org.apache.hadoop.fs.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,9 +58,9 @@ public final class HDFSWrite implements AutoCloseable { private static final Logger LOGGER = LoggerFactory.getLogger(HDFSWrite.class); private final String fileName; private final String path; - private final NewHdfsConfiguration configuration; + private final HdfsConfiguration configuration; - public HDFSWrite(NewHdfsConfiguration config, String topic, String partition, long offset) { + public HDFSWrite(HdfsConfiguration config, String topic, String partition, long offset) { this.configuration = config; path = config.hdfsPath() + "/" + topic; fileName = partition + "." + offset; // filename should be constructed from partition and offset. diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 810d582c..4efa7b71 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -45,9 +45,9 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.metrics.*; import com.teragrep.cfe_39.metrics.topic.TopicCounter; import org.apache.hadoop.fs.FileSystem; @@ -71,9 +71,9 @@ public final class HdfsDataIngestion { private static final Logger LOGGER = LoggerFactory.getLogger(HdfsDataIngestion.class); - private final NewCommonConfiguration config; - private final NewHdfsConfiguration hdfsConfig; - private final NewKafkaConfiguration kafkaConfig; + private final CommonConfiguration config; + private final HdfsConfiguration hdfsConfig; + private final KafkaConfiguration kafkaConfig; private final org.apache.kafka.clients.consumer.Consumer kafkaConsumer; private final List threads = new ArrayList<>(); private final Set activeTopics = new HashSet<>(); @@ -82,9 +82,9 @@ public final class HdfsDataIngestion { private final Map hdfsStartOffsets; public HdfsDataIngestion( - NewCommonConfiguration config, - NewHdfsConfiguration hdfsConfiguration, - NewKafkaConfiguration kafkaConfiguration + CommonConfiguration config, + HdfsConfiguration hdfsConfiguration, + KafkaConfiguration kafkaConfiguration ) throws IOException { this.config = config; this.hdfsConfig = hdfsConfiguration; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java index ab0f0477..c49453d2 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaReader.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; import org.apache.kafka.clients.consumer.*; import org.slf4j.Logger; @@ -59,7 +59,7 @@ public final class KafkaReader implements AutoCloseable { private final Logger LOGGER = LoggerFactory.getLogger(KafkaReader.class); - private final NewCommonConfiguration config; + private final CommonConfiguration config; private final Consumer kafkaConsumer; private final BatchDistributionImpl callbackFunction; private final ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl; @@ -69,7 +69,7 @@ public KafkaReader( Consumer kafkaConsumer, BatchDistributionImpl callbackFunction, ConsumerRebalanceListenerImpl consumerRebalanceListenerImpl, - NewCommonConfiguration config + CommonConfiguration config ) { this.kafkaConsumer = kafkaConsumer; this.callbackFunction = callbackFunction; diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 5ea2adae..91881653 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -47,8 +47,8 @@ import com.google.gson.JsonObject; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.queue.UniqueFileCreated; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,13 +63,13 @@ public final class PartitionFileImpl implements PartitionFile { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); private final JsonObject topicPartition; - private final NewCommonConfiguration config; - private final NewHdfsConfiguration hdfsConfig; + private final CommonConfiguration config; + private final HdfsConfiguration hdfsConfig; private final File syslogFile; private final List batchOffsets; private final PartitionRecordsImpl partitionRecords; - PartitionFileImpl(NewCommonConfiguration config, NewHdfsConfiguration hdfsConfig, JsonObject topicPartition) + PartitionFileImpl(CommonConfiguration config, HdfsConfiguration hdfsConfig, JsonObject topicPartition) throws IOException { UniqueFileCreated uniqueFileCreated = new UniqueFileCreated( config.queueDirectory(), diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java index 5bea5b60..9a78d936 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39.consumers.kafka; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; import com.teragrep.rlo_06.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,9 +59,9 @@ public final class PartitionRecordsImpl implements PartitionRecords { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionRecordsImpl.class); private final List kafkaRecordList; - private final NewCommonConfiguration config; + private final CommonConfiguration config; - public PartitionRecordsImpl(NewCommonConfiguration config) { + public PartitionRecordsImpl(CommonConfiguration config) { this.kafkaRecordList = new ArrayList<>(); this.config = config; } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java index c241af87..b6ca987d 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/ReadCoordinator.java @@ -45,9 +45,9 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.ByteArrayDeserializer; @@ -61,17 +61,17 @@ public final class ReadCoordinator implements Runnable { private static final Logger LOGGER = LoggerFactory.getLogger(ReadCoordinator.class); private final String queueTopic; - private final NewCommonConfiguration config; - private final NewHdfsConfiguration hdfsConfig; - private final NewKafkaConfiguration kafkaConfig; + private final CommonConfiguration config; + private final HdfsConfiguration hdfsConfig; + private final KafkaConfiguration kafkaConfig; private final BatchDistributionImpl callbackFunction; private final Map hdfsStartOffsets; public ReadCoordinator( String queueTopic, - NewCommonConfiguration config, - NewKafkaConfiguration kafkaConfig, - NewHdfsConfiguration hdfsConfig, + CommonConfiguration config, + KafkaConfiguration kafkaConfig, + HdfsConfiguration hdfsConfig, BatchDistributionImpl callbackFunction, Map hdfsStartOffsets ) { diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 48736d22..9b1bb3b3 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -46,8 +46,8 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -85,8 +85,8 @@ public class BatchDistributionTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @@ -105,7 +105,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -125,7 +125,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java similarity index 93% rename from src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java rename to src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java index 9d6488c6..87d8a201 100644 --- a/src/test/java/com/teragrep/cfe_39/NewCommonConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; import com.teragrep.cnf_01.PathConfiguration; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -56,9 +56,9 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -public class NewCommonConfigurationTest { +public class CommonConfigurationTest { - private final Logger LOGGER = LoggerFactory.getLogger(NewCommonConfigurationTest.class); + private final Logger LOGGER = LoggerFactory.getLogger(CommonConfigurationTest.class); @Test public void configurationTest() { @@ -73,7 +73,7 @@ public void configurationTest() { "{numOfConsumers=2, queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, maximumFileSize=3000, skipEmptyRFC5424Records=true, consumerTimeout=600000}", map.toString() ); - NewCommonConfiguration commonConfig = new NewCommonConfiguration(map); + CommonConfiguration commonConfig = new CommonConfiguration(map); // Assert that printers return correct values. Assertions diff --git a/src/test/java/com/teragrep/cfe_39/NewHdfsConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/HdfsConfigurationTest.java similarity index 94% rename from src/test/java/com/teragrep/cfe_39/NewHdfsConfigurationTest.java rename to src/test/java/com/teragrep/cfe_39/HdfsConfigurationTest.java index 223ccfbd..ff526450 100644 --- a/src/test/java/com/teragrep/cfe_39/NewHdfsConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsConfigurationTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cnf_01.PathConfiguration; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -56,9 +56,9 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -public class NewHdfsConfigurationTest { +public class HdfsConfigurationTest { - private final Logger LOGGER = LoggerFactory.getLogger(NewHdfsConfigurationTest.class); + private final Logger LOGGER = LoggerFactory.getLogger(HdfsConfigurationTest.class); @Test public void configurationTest() { @@ -73,7 +73,7 @@ public void configurationTest() { "{pruneOffset=157784760000, hdfsuri=hdfs://localhost:45937/, dfs.namenode.kerberos.principal.pattern=test, hadoop.security.authentication=kerberos, dfs.encrypt.data.transfer.cipher.suites=test, java.security.krb5.kdc=test, KerberosKeytabPath=test, dfs.data.transfer.protection=test, dfs.client.use.datanode.hostname=false, hadoop.kerberos.keytab.login.autorenewal.enabled=true, KerberosKeytabUser=test, java.security.krb5.realm=test, hadoop.security.authorization=test, hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/}", hdfsMap.toString() ); - NewHdfsConfiguration hdfsConfig = new NewHdfsConfiguration(hdfsMap); + HdfsConfiguration hdfsConfig = new HdfsConfiguration(hdfsMap); // Assert that printers return correct values. Assertions.assertEquals(157784760000L, hdfsConfig.pruneOffset()); diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index ba038061..74ffc188 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -47,8 +47,8 @@ import com.google.gson.JsonObject; import com.google.gson.JsonParser; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSWrite; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -73,8 +73,8 @@ public class HdfsTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Start minicluster and initialize config. @@ -93,7 +93,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); hdfsCluster = new TestMiniClusterFactory().create(baseDir); @@ -112,7 +112,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index 8b94eebb..660fbde8 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -46,9 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -74,9 +74,9 @@ public class Ingestion0FilesLowSizeTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion0FilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; - private static NewKafkaConfiguration kafkaConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; + private static KafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @@ -95,7 +95,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -115,7 +115,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); Map kafkaMap = new HashMap<>(); @@ -131,7 +131,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); - kafkaConfig = new NewKafkaConfiguration(kafkaMap); + kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index bc0e4290..d7c11d26 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -46,9 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -72,9 +72,9 @@ public class Ingestion0FilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion0FilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; - private static NewKafkaConfiguration kafkaConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; + private static KafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @@ -93,7 +93,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -113,7 +113,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); Map kafkaMap = new HashMap<>(); @@ -129,7 +129,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); - kafkaConfig = new NewKafkaConfiguration(kafkaMap); + kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index cc7c87ff..131f6089 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -46,9 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -75,9 +75,9 @@ public class Ingestion1Old1NewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion1Old1NewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; - private static NewKafkaConfiguration kafkaConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; + private static KafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @@ -96,7 +96,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -116,7 +116,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); Map kafkaMap = new HashMap<>(); @@ -132,7 +132,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); - kafkaConfig = new NewKafkaConfiguration(kafkaMap); + kafkaConfig = new KafkaConfiguration(kafkaMap); // Inserts pre-made avro-files to HDFS where one file has new timestamp and other old, which are normally generated during data ingestion from mock kafka consumer. String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 71f88ff5..166bbbbb 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -46,9 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -73,9 +73,9 @@ public class Ingestion2NewFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion2NewFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; - private static NewKafkaConfiguration kafkaConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; + private static KafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @@ -94,7 +94,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -114,7 +114,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); Map kafkaMap = new HashMap<>(); @@ -130,7 +130,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); - kafkaConfig = new NewKafkaConfiguration(kafkaMap); + kafkaConfig = new KafkaConfiguration(kafkaMap); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index b6cca4bf..fd78e1db 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -46,9 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileReader; import org.apache.avro.io.DatumReader; @@ -75,9 +75,9 @@ public class Ingestion2OldFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(Ingestion2OldFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; - private static NewKafkaConfiguration kafkaConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; + private static KafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @@ -96,7 +96,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -116,7 +116,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); Map kafkaMap = new HashMap<>(); @@ -132,7 +132,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); - kafkaConfig = new NewKafkaConfiguration(kafkaMap); + kafkaConfig = new KafkaConfiguration(kafkaMap); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. String path = hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"; // "hdfs:///opt/teragrep/cfe_39/srv/testConsumerTopic" diff --git a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java index c60e0ba1..5a4daa4e 100644 --- a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java +++ b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java @@ -46,9 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.HdfsDataIngestion; import org.apache.avro.file.DataFileStream; import org.apache.avro.specific.SpecificDatumReader; @@ -74,9 +74,9 @@ public class IngestionConsumerTimeoutTest { private static final Logger LOGGER = LoggerFactory.getLogger(IngestionConsumerTimeoutTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; - private static NewKafkaConfiguration kafkaConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; + private static KafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @@ -95,7 +95,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "1000"); // Low consumerTimeout - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -115,7 +115,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); Map kafkaMap = new HashMap<>(); @@ -131,7 +131,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); - kafkaConfig = new NewKafkaConfiguration(kafkaMap); + kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/NewKafkaConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConfigurationTest.java similarity index 93% rename from src/test/java/com/teragrep/cfe_39/NewKafkaConfigurationTest.java rename to src/test/java/com/teragrep/cfe_39/KafkaConfigurationTest.java index a1c09fcd..64a82805 100644 --- a/src/test/java/com/teragrep/cfe_39/NewKafkaConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConfigurationTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cnf_01.PathConfiguration; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -56,9 +56,9 @@ import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -public class NewKafkaConfigurationTest { +public class KafkaConfigurationTest { - private final Logger LOGGER = LoggerFactory.getLogger(NewKafkaConfigurationTest.class); + private final Logger LOGGER = LoggerFactory.getLogger(KafkaConfigurationTest.class); @Test public void configurationTest() { @@ -73,7 +73,7 @@ public void configurationTest() { "{java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas, security.protocol=SASL_PLAINTEXT, useMockKafkaConsumer=true, enable.auto.commit=false, max.poll.records=500, request.timeout.ms=300000, sasl.mechanism=PLAIN, group.id=cfe_39, bootstrap.servers=test, fetch.max.bytes=1073741820, max.poll.interval.ms=300000, auto.offset.reset=earliest}", kafkaMap.toString() ); - NewKafkaConfiguration kafkaConfig = new NewKafkaConfiguration(kafkaMap); + KafkaConfiguration kafkaConfig = new KafkaConfiguration(kafkaMap); // Assert that printers return correct values. Assertions.assertEquals("/opt/teragrep/cfe_39/etc/config.jaas", kafkaConfig.javaSecurityAuthLoginConfig()); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index e3e2e143..ab6c4fe4 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -46,9 +46,9 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; -import com.teragrep.cfe_39.configuration.NewKafkaConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.configuration.KafkaConfiguration; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.ReadCoordinator; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -76,9 +76,9 @@ public class KafkaConsumerTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; - private static NewKafkaConfiguration kafkaConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; + private static KafkaConfiguration kafkaConfig; private FileSystem fs; // Prepares known state for testing. @@ -97,7 +97,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -117,7 +117,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); Map kafkaMap = new HashMap<>(); @@ -133,7 +133,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); - kafkaConfig = new NewKafkaConfiguration(kafkaMap); + kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 3223ef4d..43876fb4 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -45,8 +45,8 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.BatchDistributionImpl; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.metrics.DurationStatistics; @@ -81,8 +81,8 @@ public class ProcessingFailureTest { private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewCommonConfiguration config; - private static NewHdfsConfiguration hdfsConfig; + private static CommonConfiguration config; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @@ -101,7 +101,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "false"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); // Create a HDFS miniCluster baseDir = Files.createTempDirectory("test_hdfs").toFile().getAbsoluteFile(); @@ -121,7 +121,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index d4e31ce3..0be4f079 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -70,7 +70,7 @@ public class PruningNoFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningNoFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewHdfsConfiguration hdfsConfig; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Start minicluster and initialize config. @@ -95,7 +95,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index 728351c3..27a70c9d 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -74,7 +74,7 @@ public class PruningOneNewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneNewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewHdfsConfiguration hdfsConfig; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @@ -99,7 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts a single pre-made avro-file with a new timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index ddc15401..668fbdb0 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -74,7 +74,7 @@ public class PruningOneOldFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneOldFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewHdfsConfiguration hdfsConfig; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @@ -99,7 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts a single pre-made avro-file with an olf timestamp to HDFS, which is normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index c96a5ac1..c21e1b88 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -74,7 +74,7 @@ public class PruningOneOldOneNewFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningOneOldOneNewFileTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewHdfsConfiguration hdfsConfig; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @@ -100,7 +100,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); /* Inserts pre-made avro-files to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index 78b1e88d..b551fe06 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -74,7 +74,7 @@ public class PruningTwoNewFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningTwoNewFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewHdfsConfiguration hdfsConfig; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @@ -99,7 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index b1d247c1..6b9195a1 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -45,7 +45,7 @@ */ package com.teragrep.cfe_39; -import com.teragrep.cfe_39.configuration.NewHdfsConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.HDFSPrune; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -74,7 +74,7 @@ public class PruningTwoOldFilesTest { private static final Logger LOGGER = LoggerFactory.getLogger(PruningTwoOldFilesTest.class); private static MiniDFSCluster hdfsCluster; private static File baseDir; - private static NewHdfsConfiguration hdfsConfig; + private static HdfsConfiguration hdfsConfig; private FileSystem fs; // Prepares known state for testing. @@ -99,7 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); - hdfsConfig = new NewHdfsConfiguration(hdfsMap); + hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index a141c743..2a386866 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -46,7 +46,7 @@ package com.teragrep.cfe_39; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.NewCommonConfiguration; +import com.teragrep.cfe_39.configuration.CommonConfiguration; import com.teragrep.cfe_39.consumers.kafka.KafkaRecordImpl; import com.teragrep.cfe_39.consumers.kafka.SyslogAvroWriter; import org.apache.avro.file.DataFileReader; @@ -67,7 +67,7 @@ public class SyslogAvroWriterTest { - private static NewCommonConfiguration config; + private static CommonConfiguration config; // Prepares known state for testing. @BeforeEach @@ -85,7 +85,7 @@ public void startMiniCluster() { map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); map.put("consumerTimeout", "600000"); - config = new NewCommonConfiguration(map); + config = new CommonConfiguration(map); }); } From 12bc00343126c50ecaceeef57e68852284a6cd6c Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Mon, 18 Nov 2024 15:56:56 +0200 Subject: [PATCH 71/77] Implemented PartitionFileFactory.java --- .../kafka/BatchDistributionImpl.java | 30 +++++++-- .../consumers/kafka/PartitionFileFactory.java | 67 +++++++++++++++++++ 2 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index 9ea47c3a..f5ae5909 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -70,9 +70,8 @@ public final class BatchDistributionImpl implements BatchDistribution { private final DurationStatistics durationStatistics; private final TopicCounter topicCounter; private long lastTimeCalled; - private final CommonConfiguration config; - private final HdfsConfiguration hdfsConfig; private final Map partitionFileMap; + private final PartitionFileFactory partitionFileFactory; public BatchDistributionImpl( CommonConfiguration config, @@ -81,13 +80,30 @@ public BatchDistributionImpl( DurationStatistics durationStatistics, TopicCounter topicCounter ) { - this.config = config; - this.hdfsConfig = hdfsConfig; + this( + topic, + durationStatistics, + topicCounter, + new HashMap<>(), + Instant.now().toEpochMilli(), + new PartitionFileFactory(config, hdfsConfig) + ); + } + + public BatchDistributionImpl( + String topic, + DurationStatistics durationStatistics, + TopicCounter topicCounter, + Map partitionFileMap, + long lastTimeCalled, + PartitionFileFactory partitionFileFactory + ) { this.topic = topic; this.durationStatistics = durationStatistics; this.topicCounter = topicCounter; - this.partitionFileMap = new HashMap<>(); - this.lastTimeCalled = Instant.now().toEpochMilli(); + this.partitionFileMap = partitionFileMap; + this.lastTimeCalled = lastTimeCalled; + this.partitionFileFactory = partitionFileFactory; } /* Input parameter is a batch of RecordOffsetObjects from kafka. Each object contains a record and its metadata (topic, partition and offset). @@ -118,7 +134,7 @@ public void accept(List batch) { if (!partitionFileMap.containsKey(recordOffset.get("partition").getAsString())) { try { partitionFileMap - .put(recordOffset.get("partition").getAsString(), new PartitionFileImpl(config, hdfsConfig, recordOffset)); + .put(recordOffset.get("partition").getAsString(), partitionFileFactory.partitionFor(recordOffset)); } catch (IOException e) { LOGGER.error("Failed to create new PartitionFileImpl for record <{}>", recordOffset); diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java new file mode 100644 index 00000000..a65cf79c --- /dev/null +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java @@ -0,0 +1,67 @@ +/* + * HDFS Data Ingestion for PTH_06 use CFE-39 + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.cfe_39.consumers.kafka; + +import com.google.gson.JsonObject; +import com.teragrep.cfe_39.configuration.CommonConfiguration; +import com.teragrep.cfe_39.configuration.HdfsConfiguration; + +import java.io.IOException; + +public final class PartitionFileFactory { + + private final CommonConfiguration config; + private final HdfsConfiguration hdfsConfig; + + PartitionFileFactory(CommonConfiguration config, HdfsConfiguration hdfsConfig) { + this.config = config; + this.hdfsConfig = hdfsConfig; + } + + public PartitionFileImpl partitionFor(JsonObject recordOffset) throws IOException { + return new PartitionFileImpl(config, hdfsConfig, recordOffset); + } +} From 062ddcc4c6223d8ba24c2492183ed948152576e4 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 19 Nov 2024 10:43:11 +0200 Subject: [PATCH 72/77] Improved PartitionFileImpl constructor by adding secondary constructor and moving all logic to PartitionFileFactory. --- .../consumers/kafka/PartitionFileFactory.java | 13 ++++++- .../consumers/kafka/PartitionFileImpl.java | 38 ++++++++++--------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java index a65cf79c..28c2afa9 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java @@ -48,6 +48,7 @@ import com.google.gson.JsonObject; import com.teragrep.cfe_39.configuration.CommonConfiguration; import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import com.teragrep.cfe_39.consumers.kafka.queue.UniqueFileCreated; import java.io.IOException; @@ -62,6 +63,16 @@ public final class PartitionFileFactory { } public PartitionFileImpl partitionFor(JsonObject recordOffset) throws IOException { - return new PartitionFileImpl(config, hdfsConfig, recordOffset); + UniqueFileCreated uniqueFileCreated = new UniqueFileCreated( + config.queueDirectory(), + recordOffset.get("topic").getAsString() + recordOffset.get("partition").getAsString() + ); + return new PartitionFileImpl( + uniqueFileCreated.getNextWritableFile(), + config, + hdfsConfig, + recordOffset, + new PartitionRecordsImpl(config) + ); } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index 91881653..f9a88d7a 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -49,7 +49,6 @@ import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.CommonConfiguration; import com.teragrep.cfe_39.configuration.HdfsConfiguration; -import com.teragrep.cfe_39.consumers.kafka.queue.UniqueFileCreated; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,25 +68,30 @@ public final class PartitionFileImpl implements PartitionFile { private final List batchOffsets; private final PartitionRecordsImpl partitionRecords; - PartitionFileImpl(CommonConfiguration config, HdfsConfiguration hdfsConfig, JsonObject topicPartition) - throws IOException { - UniqueFileCreated uniqueFileCreated = new UniqueFileCreated( - config.queueDirectory(), - topicPartition.get("topic").getAsString() + topicPartition.get("partition").getAsString() - ); - this.syslogFile = uniqueFileCreated.getNextWritableFile(); + PartitionFileImpl( + File file, + CommonConfiguration config, + HdfsConfiguration hdfsConfig, + JsonObject topicPartition, + PartitionRecordsImpl partitionRecords + ) { + this(file, config, hdfsConfig, topicPartition, new ArrayList<>(), partitionRecords); + } + + PartitionFileImpl( + File syslogFile, + CommonConfiguration config, + HdfsConfiguration hdfsConfig, + JsonObject topicPartition, + List batchOffsets, + PartitionRecordsImpl partitionRecords + ) { + this.syslogFile = syslogFile; this.config = config; this.hdfsConfig = hdfsConfig; this.topicPartition = topicPartition; - this.batchOffsets = new ArrayList<>(); - this.partitionRecords = new PartitionRecordsImpl(config); - if (LOGGER.isDebugEnabled()) { - LOGGER - .debug( - "PartitionFileImpl representing topic {} partition {} initialized successfully. syslogFile path allocated to the object is {}", - topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath() - ); - } + this.batchOffsets = batchOffsets; + this.partitionRecords = partitionRecords; } @Override From b6fe5dc1c322634dbd0805967c1b547d026b4594 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Tue, 19 Nov 2024 11:13:37 +0200 Subject: [PATCH 73/77] Moved maximumFileSize from common configuration parameters to hdfs configuration, reducing encapsulation in PartitionFileImpl. Refactored all tests according to the change. --- rpm/resources/application.properties | 2 -- rpm/resources/egress.properties | 4 ++- .../configuration/CommonConfiguration.java | 25 ------------------- .../configuration/HdfsConfiguration.java | 25 +++++++++++++++++++ .../consumers/kafka/PartitionFileFactory.java | 1 - .../consumers/kafka/PartitionFileImpl.java | 9 ++----- .../cfe_39/BatchDistributionTest.java | 2 +- .../cfe_39/CommonConfigurationTest.java | 3 +-- .../cfe_39/HdfsConfigurationTest.java | 3 ++- .../java/com/teragrep/cfe_39/HdfsTest.java | 2 +- .../cfe_39/Ingestion0FilesLowSizeTest.java | 2 +- .../teragrep/cfe_39/Ingestion0FilesTest.java | 2 +- .../cfe_39/Ingestion1Old1NewFileTest.java | 2 +- .../cfe_39/Ingestion2NewFilesTest.java | 2 +- .../cfe_39/Ingestion2OldFilesTest.java | 2 +- .../cfe_39/IngestionConsumerTimeoutTest.java | 4 +-- .../teragrep/cfe_39/KafkaConsumerTest.java | 2 +- .../cfe_39/ProcessingFailureTest.java | 2 +- .../teragrep/cfe_39/PruningNoFilesTest.java | 1 + .../cfe_39/PruningOneNewFileTest.java | 1 + .../cfe_39/PruningOneOldFileTest.java | 1 + .../cfe_39/PruningOneOldOneNewFileTest.java | 1 + .../cfe_39/PruningTwoNewFilesTest.java | 1 + .../cfe_39/PruningTwoOldFilesTest.java | 1 + .../teragrep/cfe_39/SyslogAvroWriterTest.java | 1 - .../resources/broken.application.properties | 2 -- .../failProcessing.application.properties | 2 -- .../largeFile.application.properties | 2 -- .../resources/valid.application.properties | 2 -- src/test/resources/valid.hdfs.properties | 4 ++- 30 files changed, 53 insertions(+), 60 deletions(-) diff --git a/rpm/resources/application.properties b/rpm/resources/application.properties index 9db46c63..aa741f0e 100644 --- a/rpm/resources/application.properties +++ b/rpm/resources/application.properties @@ -10,8 +10,6 @@ queueTopicPattern=^testConsumerTopic-*$ numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ -# The maximum file size for AVRO-files that are to be stored in HDFS database. -maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. skipNonRFC5424Records=true # Boolean for deciding if empty RFC5424 records should be skipped or not. diff --git a/rpm/resources/egress.properties b/rpm/resources/egress.properties index 864b019a..75b76de3 100644 --- a/rpm/resources/egress.properties +++ b/rpm/resources/egress.properties @@ -15,4 +15,6 @@ KerberosKeytabPath=test dfs.client.use.datanode.hostname=false hadoop.kerberos.keytab.login.autorenewal.enabled=true dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file +dfs.encrypt.data.transfer.cipher.suites=test +# The maximum file size for AVRO-files that are to be stored in HDFS database. +maximumFileSize=3000 \ No newline at end of file diff --git a/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java index 9fcdc4ea..9fa98d82 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java @@ -111,31 +111,6 @@ public String queueDirectory() { return config.getOrDefault("queueDirectory", System.getProperty("user.dir") + "/rpm/resources/queue"); } - public long maximumFileSize() { - final String numString = config.get("maximumFileSize"); - if (numString == null) { - throw new ConfigurationException("Configuration error. must be set."); - } - else { - final long maximumFileSize; - try { - maximumFileSize = Long.parseLong(numString); - } - catch (NumberFormatException e) { - LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); - throw new RuntimeException(e); - } - if (maximumFileSize <= 0) { - throw new ConfigurationException( - "Configuration error. must be a positive long value." - ); - } - else { - return maximumFileSize; - } - } - } - public boolean skipNonRFC5424Records() { final String skipString = config.get("skipNonRFC5424Records"); if (skipString == null) { diff --git a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java index c3edc5c6..2d1aeb1c 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/HdfsConfiguration.java @@ -223,4 +223,29 @@ public String dfsEncryptDataTransferCipherSuites() { } } + public long maximumFileSize() { + final String numString = config.get("maximumFileSize"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final long maximumFileSize; + try { + maximumFileSize = Long.parseLong(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (maximumFileSize <= 0) { + throw new ConfigurationException( + "Configuration error. must be a positive long value." + ); + } + else { + return maximumFileSize; + } + } + } + } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java index 28c2afa9..c0ead5b7 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java @@ -69,7 +69,6 @@ public PartitionFileImpl partitionFor(JsonObject recordOffset) throws IOExceptio ); return new PartitionFileImpl( uniqueFileCreated.getNextWritableFile(), - config, hdfsConfig, recordOffset, new PartitionRecordsImpl(config) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index f9a88d7a..f32790c4 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -47,7 +47,6 @@ import com.google.gson.JsonObject; import com.teragrep.cfe_39.avro.SyslogRecord; -import com.teragrep.cfe_39.configuration.CommonConfiguration; import com.teragrep.cfe_39.configuration.HdfsConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,7 +61,6 @@ public final class PartitionFileImpl implements PartitionFile { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); private final JsonObject topicPartition; - private final CommonConfiguration config; private final HdfsConfiguration hdfsConfig; private final File syslogFile; private final List batchOffsets; @@ -70,24 +68,21 @@ public final class PartitionFileImpl implements PartitionFile { PartitionFileImpl( File file, - CommonConfiguration config, HdfsConfiguration hdfsConfig, JsonObject topicPartition, PartitionRecordsImpl partitionRecords ) { - this(file, config, hdfsConfig, topicPartition, new ArrayList<>(), partitionRecords); + this(file, hdfsConfig, topicPartition, new ArrayList<>(), partitionRecords); } PartitionFileImpl( File syslogFile, - CommonConfiguration config, HdfsConfiguration hdfsConfig, JsonObject topicPartition, List batchOffsets, PartitionRecordsImpl partitionRecords ) { this.syslogFile = syslogFile; - this.config = config; this.hdfsConfig = hdfsConfig; this.topicPartition = topicPartition; this.batchOffsets = batchOffsets; @@ -111,7 +106,7 @@ public void commitRecords() throws IOException { storedOffset = next.getOffset(); } // When the file size has gone above the maximum, commit the file into HDFS using the latest topic/partition/offset values as the filename and then delete the local avro-file. - if (config.maximumFileSize() < syslogFile.length()) { + if (hdfsConfig.maximumFileSize() < syslogFile.length()) { writeToHdfs(storedOffset); } } diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index 9b1bb3b3..b1ac8bcb 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -98,7 +98,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "3000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -125,6 +124,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); diff --git a/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java index 87d8a201..25a6e7ef 100644 --- a/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java @@ -70,7 +70,7 @@ public void configurationTest() { map = pathConfiguration.asMap(); Assertions .assertEquals( - "{numOfConsumers=2, queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, maximumFileSize=3000, skipEmptyRFC5424Records=true, consumerTimeout=600000}", + "{numOfConsumers=2, queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, skipEmptyRFC5424Records=true, consumerTimeout=600000}", map.toString() ); CommonConfiguration commonConfig = new CommonConfiguration(map); @@ -83,7 +83,6 @@ public void configurationTest() { Assertions .assertEquals(System.getProperty("user.dir") + "/rpm/resources/log4j2.properties", commonConfig.log4j2ConfigurationFile()); Assertions.assertEquals(2, commonConfig.numOfConsumers()); - Assertions.assertEquals(3000, commonConfig.maximumFileSize()); Assertions.assertEquals(600000, commonConfig.consumerTimeout()); Assertions.assertTrue(commonConfig.skipNonRFC5424Records()); Assertions.assertTrue(commonConfig.skipEmptyRFC5424Records()); diff --git a/src/test/java/com/teragrep/cfe_39/HdfsConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/HdfsConfigurationTest.java index ff526450..560ca63d 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsConfigurationTest.java @@ -70,7 +70,7 @@ public void configurationTest() { hdfsMap = hdfsPathConfiguration.asMap(); Assertions .assertEquals( - "{pruneOffset=157784760000, hdfsuri=hdfs://localhost:45937/, dfs.namenode.kerberos.principal.pattern=test, hadoop.security.authentication=kerberos, dfs.encrypt.data.transfer.cipher.suites=test, java.security.krb5.kdc=test, KerberosKeytabPath=test, dfs.data.transfer.protection=test, dfs.client.use.datanode.hostname=false, hadoop.kerberos.keytab.login.autorenewal.enabled=true, KerberosKeytabUser=test, java.security.krb5.realm=test, hadoop.security.authorization=test, hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/}", + "{pruneOffset=157784760000, hdfsuri=hdfs://localhost:45937/, dfs.namenode.kerberos.principal.pattern=test, hadoop.security.authentication=kerberos, dfs.encrypt.data.transfer.cipher.suites=test, java.security.krb5.kdc=test, maximumFileSize=3000, KerberosKeytabPath=test, dfs.data.transfer.protection=test, dfs.client.use.datanode.hostname=false, hadoop.kerberos.keytab.login.autorenewal.enabled=true, KerberosKeytabUser=test, java.security.krb5.realm=test, hadoop.security.authorization=test, hdfsPath=hdfs:///opt/teragrep/cfe_39/srv/}", hdfsMap.toString() ); HdfsConfiguration hdfsConfig = new HdfsConfiguration(hdfsMap); @@ -90,6 +90,7 @@ public void configurationTest() { Assertions.assertEquals("true", hdfsConfig.hadoopKerberosKeytabLoginAutorenewalEnabled()); Assertions.assertEquals("test", hdfsConfig.dfsDataTransferProtection()); Assertions.assertEquals("test", hdfsConfig.dfsEncryptDataTransferCipherSuites()); + Assertions.assertEquals(3000, hdfsConfig.maximumFileSize()); }); } } diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 74ffc188..7a968534 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -86,7 +86,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "3000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -112,6 +111,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index 660fbde8..8cc51085 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -88,7 +88,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "3000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -115,6 +114,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index d7c11d26..61a6795f 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -86,7 +86,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "30000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -113,6 +112,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "30000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 131f6089..202a2992 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -89,7 +89,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "30000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -116,6 +115,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "30000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 166bbbbb..9fac1bf2 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -87,7 +87,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "30000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -114,6 +113,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "30000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index fd78e1db..46683220 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -89,7 +89,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "30000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -116,6 +115,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "30000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java index 5a4daa4e..13c5f171 100644 --- a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java +++ b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java @@ -88,7 +88,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "3000000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -115,6 +114,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); @@ -155,7 +155,7 @@ public void ingestion0FilesTest() { assertDoesNotThrow(() -> { Assertions.assertTrue(hdfsConfig.pruneOffset() >= 300000L); // Fails the test if the config is not correct. Assertions.assertEquals(1000, config.consumerTimeout()); - Assertions.assertEquals(3000000, config.maximumFileSize()); + Assertions.assertEquals(3000000, hdfsConfig.maximumFileSize()); Assertions.assertFalse(fs.exists(new Path(hdfsConfig.hdfsPath() + "/" + "testConsumerTopic"))); HdfsDataIngestion hdfsDataIngestion = new HdfsDataIngestion(config, hdfsConfig, kafkaConfig); hdfsDataIngestion.run(); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index ab6c4fe4..753f70b2 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -90,7 +90,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "30000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); @@ -117,6 +116,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "30000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 43876fb4..0cada282 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -94,7 +94,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "3000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "false"); @@ -121,6 +120,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); diff --git a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java index 0be4f079..c83e592c 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningNoFilesTest.java @@ -95,6 +95,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); }); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java index 27a70c9d..98cbd310 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneNewFileTest.java @@ -99,6 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java index 668fbdb0..256e0034 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldFileTest.java @@ -99,6 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java index c21e1b88..ce131d5d 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningOneOldOneNewFileTest.java @@ -100,6 +100,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java index b551fe06..f58ea55b 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoNewFilesTest.java @@ -99,6 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java index 6b9195a1..7987fd37 100644 --- a/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/PruningTwoOldFilesTest.java @@ -99,6 +99,7 @@ public void startMiniCluster() { hdfsMap.put("hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); hdfsMap.put("dfs.data.transfer.protection", "test"); hdfsMap.put("dfs.encrypt.data.transfer.cipher.suites", "test"); + hdfsMap.put("maximumFileSize", "3000"); hdfsConfig = new HdfsConfiguration(hdfsMap); fs = new TestFileSystemFactory().create(hdfsConfig.hdfsUri()); diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index 2a386866..05294374 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -78,7 +78,6 @@ public void startMiniCluster() { map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); - map.put("maximumFileSize", "3000"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); diff --git a/src/test/resources/broken.application.properties b/src/test/resources/broken.application.properties index 2c08db30..a0132be3 100644 --- a/src/test/resources/broken.application.properties +++ b/src/test/resources/broken.application.properties @@ -10,8 +10,6 @@ queueTopicPattern=^testConsumerTopic-*$ # numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ -# The maximum file size for AVRO-files that are to be stored in HDFS database. -maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. skipNonRFC5424Records=true # Boolean for deciding if empty RFC5424 records should be skipped or not. diff --git a/src/test/resources/failProcessing.application.properties b/src/test/resources/failProcessing.application.properties index 2a6d8e7b..edabc04e 100644 --- a/src/test/resources/failProcessing.application.properties +++ b/src/test/resources/failProcessing.application.properties @@ -10,8 +10,6 @@ queueTopicPattern=^testConsumerTopic-*$ numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ -# The maximum file size for AVRO-files that are to be stored in HDFS database. -maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. skipNonRFC5424Records=false # Boolean for deciding if empty RFC5424 records should be skipped or not. diff --git a/src/test/resources/largeFile.application.properties b/src/test/resources/largeFile.application.properties index e0bd58f4..e35c8409 100644 --- a/src/test/resources/largeFile.application.properties +++ b/src/test/resources/largeFile.application.properties @@ -10,8 +10,6 @@ queueTopicPattern=^testConsumerTopic-*$ numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ -# The maximum file size for AVRO-files that are to be stored in HDFS database. -maximumFileSize=3000000 # Boolean for deciding if records not in RFC5424 should be skipped or not. skipNonRFC5424Records=true # Boolean for deciding if empty RFC5424 records should be skipped or not. diff --git a/src/test/resources/valid.application.properties b/src/test/resources/valid.application.properties index acac899b..408fc7cf 100644 --- a/src/test/resources/valid.application.properties +++ b/src/test/resources/valid.application.properties @@ -10,8 +10,6 @@ queueTopicPattern=^testConsumerTopic-*$ numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ -# The maximum file size for AVRO-files that are to be stored in HDFS database. -maximumFileSize=3000 # Boolean for deciding if records not in RFC5424 should be skipped or not. skipNonRFC5424Records=true # Boolean for deciding if empty RFC5424 records should be skipped or not. diff --git a/src/test/resources/valid.hdfs.properties b/src/test/resources/valid.hdfs.properties index 864b019a..75b76de3 100644 --- a/src/test/resources/valid.hdfs.properties +++ b/src/test/resources/valid.hdfs.properties @@ -15,4 +15,6 @@ KerberosKeytabPath=test dfs.client.use.datanode.hostname=false hadoop.kerberos.keytab.login.autorenewal.enabled=true dfs.data.transfer.protection=test -dfs.encrypt.data.transfer.cipher.suites=test \ No newline at end of file +dfs.encrypt.data.transfer.cipher.suites=test +# The maximum file size for AVRO-files that are to be stored in HDFS database. +maximumFileSize=3000 \ No newline at end of file From 90af1309e0c0482de1cd5c1597eb3efa12f17e8f Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 11 Dec 2024 10:38:17 +0200 Subject: [PATCH 74/77] Fixed kafka and hdfs configuration paths. --- src/main/java/com/teragrep/cfe_39/Main.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/Main.java b/src/main/java/com/teragrep/cfe_39/Main.java index 280b0c2c..4ba8d170 100644 --- a/src/main/java/com/teragrep/cfe_39/Main.java +++ b/src/main/java/com/teragrep/cfe_39/Main.java @@ -84,7 +84,7 @@ public static void main(String[] args) throws Exception { Configurator.reconfigure(log4j2Config.toUri()); // KafkaConfiguration - final PathConfiguration kafkaPathConfiguration = new PathConfiguration(commonConfig.egressConfigurationFile()); + final PathConfiguration kafkaPathConfiguration = new PathConfiguration(commonConfig.ingressConfigurationFile()); final Map kafkaMap; try { kafkaMap = kafkaPathConfiguration.asMap(); @@ -96,7 +96,7 @@ public static void main(String[] args) throws Exception { KafkaConfiguration kafkaConfig = new KafkaConfiguration(kafkaMap); // HdfsConfiguration - final PathConfiguration hdfsPathConfiguration = new PathConfiguration(commonConfig.ingressConfigurationFile()); + final PathConfiguration hdfsPathConfiguration = new PathConfiguration(commonConfig.egressConfigurationFile()); final Map hdfsMap; try { hdfsMap = hdfsPathConfiguration.asMap(); From fc15b0a8ae517061a3aa400e413fc14b72aa4e53 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 11 Dec 2024 11:19:52 +0200 Subject: [PATCH 75/77] Moved numOfConsumers parameter from CommonConfiguration to KafkaConfiguration. Refactored code according to the change. --- rpm/resources/application.properties | 2 -- rpm/resources/ingress.properties | 2 ++ .../configuration/CommonConfiguration.java | 23 ------------------- .../configuration/KafkaConfiguration.java | 23 +++++++++++++++++++ .../consumers/kafka/HdfsDataIngestion.java | 2 +- .../cfe_39/BatchDistributionTest.java | 1 - .../cfe_39/CommonConfigurationTest.java | 3 +-- .../java/com/teragrep/cfe_39/HdfsTest.java | 1 - .../cfe_39/Ingestion0FilesLowSizeTest.java | 2 +- .../teragrep/cfe_39/Ingestion0FilesTest.java | 2 +- .../cfe_39/Ingestion1Old1NewFileTest.java | 2 +- .../cfe_39/Ingestion2NewFilesTest.java | 2 +- .../cfe_39/Ingestion2OldFilesTest.java | 2 +- .../cfe_39/IngestionConsumerTimeoutTest.java | 2 +- .../cfe_39/KafkaConfigurationTest.java | 3 ++- .../teragrep/cfe_39/KafkaConsumerTest.java | 2 +- .../cfe_39/ProcessingFailureTest.java | 1 - .../teragrep/cfe_39/SyslogAvroWriterTest.java | 1 - .../resources/broken.application.properties | 2 -- .../failProcessing.application.properties | 2 -- .../largeFile.application.properties | 2 -- .../resources/valid.application.properties | 2 -- src/test/resources/valid.kafka.properties | 4 +++- 23 files changed, 39 insertions(+), 49 deletions(-) diff --git a/rpm/resources/application.properties b/rpm/resources/application.properties index aa741f0e..816fbffa 100644 --- a/rpm/resources/application.properties +++ b/rpm/resources/application.properties @@ -6,8 +6,6 @@ egress.configurationFile=/opt/teragrep/cfe_39/etc/egress.properties ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ -# Number of consumers created to the consumer groups -numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # Boolean for deciding if records not in RFC5424 should be skipped or not. diff --git a/rpm/resources/ingress.properties b/rpm/resources/ingress.properties index b23a2775..fd123af9 100644 --- a/rpm/resources/ingress.properties +++ b/rpm/resources/ingress.properties @@ -20,3 +20,5 @@ request.timeout.ms=300000 max.poll.interval.ms=300000 # For testing only useMockKafkaConsumer=true +# Number of consumers created to the consumer groups +numOfConsumers=2 \ No newline at end of file diff --git a/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java index 9fa98d82..4b14b22f 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/CommonConfiguration.java @@ -84,29 +84,6 @@ public String queueTopicPattern() { return config.getOrDefault("queueTopicPattern", ".*"); } - public int numOfConsumers() { - final String numString = config.get("numOfConsumers"); - if (numString == null) { - throw new ConfigurationException("Configuration error. must be set."); - } - else { - final int numOfConsumers; - try { - numOfConsumers = Integer.parseInt(numString); - } - catch (NumberFormatException e) { - LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); - throw new RuntimeException(e); - } - if (numOfConsumers <= 0) { - throw new ConfigurationException("Configuration error. must be a positive integer."); - } - else { - return numOfConsumers; - } - } - } - public String queueDirectory() { return config.getOrDefault("queueDirectory", System.getProperty("user.dir") + "/rpm/resources/queue"); } diff --git a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java index 2db13e10..8c27bf35 100644 --- a/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java +++ b/src/main/java/com/teragrep/cfe_39/configuration/KafkaConfiguration.java @@ -235,4 +235,27 @@ public boolean useMockKafkaConsumer() { } } + public int numOfConsumers() { + final String numString = config.get("numOfConsumers"); + if (numString == null) { + throw new ConfigurationException("Configuration error. must be set."); + } + else { + final int numOfConsumers; + try { + numOfConsumers = Integer.parseInt(numString); + } + catch (NumberFormatException e) { + LOGGER.error("Configuration error. Invalid value for : <{}>", e.getMessage()); + throw new RuntimeException(e); + } + if (numOfConsumers <= 0) { + throw new ConfigurationException("Configuration error. must be a positive integer."); + } + else { + return numOfConsumers; + } + } + } + } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java index 4efa7b71..19c158be 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/HdfsDataIngestion.java @@ -89,7 +89,7 @@ public HdfsDataIngestion( this.config = config; this.hdfsConfig = hdfsConfiguration; this.kafkaConfig = kafkaConfiguration; - this.numOfConsumers = config.numOfConsumers(); + this.numOfConsumers = kafkaConfig.numOfConsumers(); this.useMockKafkaConsumer = kafkaConfiguration.useMockKafkaConsumer(); if (useMockKafkaConsumer) { this.kafkaConsumer = new MockKafkaConsumerFactory(0).getConsumer(); // A consumer used only for scanning the available topics to be allocated to consumers running in different threads (thus 0 as input parameter). diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index b1ac8bcb..b8e95aca 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -99,7 +99,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); diff --git a/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java index 25a6e7ef..56c9ff54 100644 --- a/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/CommonConfigurationTest.java @@ -70,7 +70,7 @@ public void configurationTest() { map = pathConfiguration.asMap(); Assertions .assertEquals( - "{numOfConsumers=2, queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, skipEmptyRFC5424Records=true, consumerTimeout=600000}", + "{queueTopicPattern=^testConsumerTopic-*$, queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/, skipNonRFC5424Records=true, skipEmptyRFC5424Records=true, consumerTimeout=600000}", map.toString() ); CommonConfiguration commonConfig = new CommonConfiguration(map); @@ -82,7 +82,6 @@ public void configurationTest() { .assertEquals(System.getProperty("user.dir") + "/rpm/resources/ingress.properties", commonConfig.ingressConfigurationFile()); Assertions .assertEquals(System.getProperty("user.dir") + "/rpm/resources/log4j2.properties", commonConfig.log4j2ConfigurationFile()); - Assertions.assertEquals(2, commonConfig.numOfConsumers()); Assertions.assertEquals(600000, commonConfig.consumerTimeout()); Assertions.assertTrue(commonConfig.skipNonRFC5424Records()); Assertions.assertTrue(commonConfig.skipEmptyRFC5424Records()); diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 7a968534..1fb25ea2 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -87,7 +87,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index 8cc51085..31ea8183 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -89,7 +89,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); @@ -131,6 +130,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaMap.put("numOfConsumers", "2"); kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index 61a6795f..cf5fe5f8 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -87,7 +87,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); @@ -129,6 +128,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaMap.put("numOfConsumers", "2"); kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 202a2992..88e3d38b 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -90,7 +90,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); @@ -132,6 +131,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaMap.put("numOfConsumers", "2"); kafkaConfig = new KafkaConfiguration(kafkaMap); // Inserts pre-made avro-files to HDFS where one file has new timestamp and other old, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index 9fac1bf2..c61fa313 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -88,7 +88,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); @@ -130,6 +129,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaMap.put("numOfConsumers", "2"); kafkaConfig = new KafkaConfiguration(kafkaMap); // Inserts pre-made avro-files with new timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 46683220..288037e0 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -90,7 +90,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); @@ -132,6 +131,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaMap.put("numOfConsumers", "2"); kafkaConfig = new KafkaConfiguration(kafkaMap); // Inserts pre-made avro-files with old timestamps to HDFS, which are normally generated during data ingestion from mock kafka consumer. diff --git a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java index 13c5f171..1dfd57ce 100644 --- a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java +++ b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java @@ -89,7 +89,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); @@ -131,6 +130,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaMap.put("numOfConsumers", "2"); kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConfigurationTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConfigurationTest.java index 64a82805..6a997799 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConfigurationTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConfigurationTest.java @@ -70,7 +70,7 @@ public void configurationTest() { kafkaMap = kafkaPathConfiguration.asMap(); Assertions .assertEquals( - "{java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas, security.protocol=SASL_PLAINTEXT, useMockKafkaConsumer=true, enable.auto.commit=false, max.poll.records=500, request.timeout.ms=300000, sasl.mechanism=PLAIN, group.id=cfe_39, bootstrap.servers=test, fetch.max.bytes=1073741820, max.poll.interval.ms=300000, auto.offset.reset=earliest}", + "{java.security.auth.login.config=/opt/teragrep/cfe_39/etc/config.jaas, numOfConsumers=2, useMockKafkaConsumer=true, max.poll.records=500, request.timeout.ms=300000, group.id=cfe_39, bootstrap.servers=test, security.protocol=SASL_PLAINTEXT, enable.auto.commit=false, sasl.mechanism=PLAIN, fetch.max.bytes=1073741820, max.poll.interval.ms=300000, auto.offset.reset=earliest}", kafkaMap.toString() ); KafkaConfiguration kafkaConfig = new KafkaConfiguration(kafkaMap); @@ -88,6 +88,7 @@ public void configurationTest() { Assertions.assertEquals(300000, kafkaConfig.requestTimeoutMs()); Assertions.assertEquals(300000, kafkaConfig.maxPollIntervalMs()); Assertions.assertTrue(kafkaConfig.useMockKafkaConsumer()); + Assertions.assertEquals(2, kafkaConfig.numOfConsumers()); }); } diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index 753f70b2..f69eeed0 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -91,7 +91,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); @@ -133,6 +132,7 @@ public void startMiniCluster() { kafkaMap.put("request.timeout.ms", "300000"); kafkaMap.put("max.poll.interval.ms", "300000"); kafkaMap.put("useMockKafkaConsumer", "true"); + kafkaMap.put("numOfConsumers", "2"); kafkaConfig = new KafkaConfiguration(kafkaMap); }); } diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 0cada282..3cffd0b4 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -95,7 +95,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "false"); map.put("skipEmptyRFC5424Records", "false"); map.put("pruneOffset", "157784760000"); diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index 05294374..1de10743 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -79,7 +79,6 @@ public void startMiniCluster() { map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); - map.put("numOfConsumers", "2"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); map.put("pruneOffset", "157784760000"); diff --git a/src/test/resources/broken.application.properties b/src/test/resources/broken.application.properties index a0132be3..dfadca92 100644 --- a/src/test/resources/broken.application.properties +++ b/src/test/resources/broken.application.properties @@ -6,8 +6,6 @@ #ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ -# Number of consumers created to the consumer groups -# numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # Boolean for deciding if records not in RFC5424 should be skipped or not. diff --git a/src/test/resources/failProcessing.application.properties b/src/test/resources/failProcessing.application.properties index edabc04e..467ecb33 100644 --- a/src/test/resources/failProcessing.application.properties +++ b/src/test/resources/failProcessing.application.properties @@ -6,8 +6,6 @@ #ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ -# Number of consumers created to the consumer groups -numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # Boolean for deciding if records not in RFC5424 should be skipped or not. diff --git a/src/test/resources/largeFile.application.properties b/src/test/resources/largeFile.application.properties index e35c8409..dfadca92 100644 --- a/src/test/resources/largeFile.application.properties +++ b/src/test/resources/largeFile.application.properties @@ -6,8 +6,6 @@ #ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ -# Number of consumers created to the consumer groups -numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # Boolean for deciding if records not in RFC5424 should be skipped or not. diff --git a/src/test/resources/valid.application.properties b/src/test/resources/valid.application.properties index 408fc7cf..1bdbd356 100644 --- a/src/test/resources/valid.application.properties +++ b/src/test/resources/valid.application.properties @@ -6,8 +6,6 @@ #ingress.configurationFile=/opt/teragrep/cfe_39/etc/ingress.properties # What topics are searched from kafka, regex queueTopicPattern=^testConsumerTopic-*$ -# Number of consumers created to the consumer groups -numOfConsumers=2 # Directory where AVRO files are constructed for HDFS queueDirectory=/opt/teragrep/cfe_39/etc/AVRO/ # Boolean for deciding if records not in RFC5424 should be skipped or not. diff --git a/src/test/resources/valid.kafka.properties b/src/test/resources/valid.kafka.properties index b167b3e3..fd123af9 100644 --- a/src/test/resources/valid.kafka.properties +++ b/src/test/resources/valid.kafka.properties @@ -19,4 +19,6 @@ fetch.max.bytes=1073741820 request.timeout.ms=300000 max.poll.interval.ms=300000 # For testing only -useMockKafkaConsumer=true \ No newline at end of file +useMockKafkaConsumer=true +# Number of consumers created to the consumer groups +numOfConsumers=2 \ No newline at end of file From f3f8fdeec22f91a834e5057c1ae1678f6d9b06d4 Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 11 Dec 2024 13:46:17 +0200 Subject: [PATCH 76/77] Replaced offsetToJSON() printer of KafkaRecord interface with topicPartition() and offset() methods. Refactored code to match the change. --- .../kafka/BatchDistributionImpl.java | 10 ++++---- .../cfe_39/consumers/kafka/KafkaRecord.java | 5 +++- .../consumers/kafka/KafkaRecordImpl.java | 11 ++++++--- .../consumers/kafka/PartitionFileFactory.java | 8 +++---- .../consumers/kafka/PartitionFileImpl.java | 23 +++++++++++-------- .../consumers/kafka/PartitionRecordsImpl.java | 20 +++++++++++----- 6 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java index f5ae5909..ce1781c3 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/BatchDistributionImpl.java @@ -129,20 +129,20 @@ public void accept(List batch) { ListIterator recordOffsetListIterator = batch.listIterator(); while (recordOffsetListIterator.hasNext()) { KafkaRecordImpl next = recordOffsetListIterator.next(); - JsonObject recordOffset = JsonParser.parseString(next.offsetToJSON()).getAsJsonObject(); // If the PartitionFileImpl corresponding to the record's partition doesn't exist, create one. - if (!partitionFileMap.containsKey(recordOffset.get("partition").getAsString())) { + if (!partitionFileMap.containsKey(Integer.toString(next.topicPartition().partition()))) { try { partitionFileMap - .put(recordOffset.get("partition").getAsString(), partitionFileFactory.partitionFor(recordOffset)); + .put(Integer.toString(next.topicPartition().partition()), partitionFileFactory.partitionFor(next.topicPartition())); } catch (IOException e) { - LOGGER.error("Failed to create new PartitionFileImpl for record <{}>", recordOffset); + LOGGER.error("Failed to create new PartitionFileImpl for record <{}>", next.topicPartition()); throw new RuntimeException(e); } } // Every PartitionFileImpl object will hold responsibility over a single unique file that is related to a single topic partition. - PartitionFileImpl recordPartitionFile = partitionFileMap.get(recordOffset.get("partition").getAsString()); + PartitionFileImpl recordPartitionFile = partitionFileMap + .get(Integer.toString(next.topicPartition().partition())); // Tell PartitionFileImpl to add the current record to the list of records that are going to be added to the file. recordPartitionFile.addRecord(next); batchBytes = batchBytes + next.size(); // metrics diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java index 355a3e67..31565d24 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecord.java @@ -46,12 +46,15 @@ package com.teragrep.cfe_39.consumers.kafka; import com.teragrep.cfe_39.avro.SyslogRecord; +import org.apache.kafka.common.TopicPartition; public interface KafkaRecord { public abstract long size(); - public abstract String offsetToJSON(); + public abstract TopicPartition topicPartition(); + + public abstract long offset(); public abstract SyslogRecord toSyslogRecord(); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java index e3870447..d313287f 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/KafkaRecordImpl.java @@ -46,6 +46,7 @@ package com.teragrep.cfe_39.consumers.kafka; import com.teragrep.cfe_39.avro.SyslogRecord; +import org.apache.kafka.common.TopicPartition; import java.io.ByteArrayInputStream; import java.io.InputStream; @@ -76,9 +77,13 @@ public long size() { } @Override - public String offsetToJSON() { - return String - .format("{\"topic\":\"%s\", \"partition\":%d, \"offset\":%d}", this.topic, this.partition, this.offset); + public TopicPartition topicPartition() { + return new TopicPartition(topic, partition); + } + + @Override + public long offset() { + return this.offset; } @Override diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java index c0ead5b7..4a9ffe92 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileFactory.java @@ -45,10 +45,10 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.google.gson.JsonObject; import com.teragrep.cfe_39.configuration.CommonConfiguration; import com.teragrep.cfe_39.configuration.HdfsConfiguration; import com.teragrep.cfe_39.consumers.kafka.queue.UniqueFileCreated; +import org.apache.kafka.common.TopicPartition; import java.io.IOException; @@ -62,15 +62,15 @@ public final class PartitionFileFactory { this.hdfsConfig = hdfsConfig; } - public PartitionFileImpl partitionFor(JsonObject recordOffset) throws IOException { + public PartitionFileImpl partitionFor(TopicPartition topicPartition) throws IOException { UniqueFileCreated uniqueFileCreated = new UniqueFileCreated( config.queueDirectory(), - recordOffset.get("topic").getAsString() + recordOffset.get("partition").getAsString() + topicPartition.topic() + topicPartition.partition() ); return new PartitionFileImpl( uniqueFileCreated.getNextWritableFile(), hdfsConfig, - recordOffset, + topicPartition, new PartitionRecordsImpl(config) ); } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java index f32790c4..f6f13e32 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionFileImpl.java @@ -45,9 +45,9 @@ */ package com.teragrep.cfe_39.consumers.kafka; -import com.google.gson.JsonObject; import com.teragrep.cfe_39.avro.SyslogRecord; import com.teragrep.cfe_39.configuration.HdfsConfiguration; +import org.apache.kafka.common.TopicPartition; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,7 +60,7 @@ public final class PartitionFileImpl implements PartitionFile { private static final Logger LOGGER = LoggerFactory.getLogger(PartitionFileImpl.class); - private final JsonObject topicPartition; + private final TopicPartition topicPartition; private final HdfsConfiguration hdfsConfig; private final File syslogFile; private final List batchOffsets; @@ -69,7 +69,7 @@ public final class PartitionFileImpl implements PartitionFile { PartitionFileImpl( File file, HdfsConfiguration hdfsConfig, - JsonObject topicPartition, + TopicPartition topicPartition, PartitionRecordsImpl partitionRecords ) { this(file, hdfsConfig, topicPartition, new ArrayList<>(), partitionRecords); @@ -78,7 +78,7 @@ public final class PartitionFileImpl implements PartitionFile { PartitionFileImpl( File syslogFile, HdfsConfiguration hdfsConfig, - JsonObject topicPartition, + TopicPartition topicPartition, List batchOffsets, PartitionRecordsImpl partitionRecords ) { @@ -116,7 +116,7 @@ public void commitRecords() throws IOException { LOGGER .debug( "Kafka Batch for topic {} partition {} processed successfully. Final record offset of the batch was {}.", - topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset + topicPartition.topic(), topicPartition.partition(), storedOffset ); } batchOffsets.add(storedOffset); @@ -127,7 +127,7 @@ public void commitRecords() throws IOException { LOGGER .debug( "Kafka Batch for topic {} partition {} was empty. Final record offset of the batch was {}. Proceeding to write the existing syslogFile to HDFS.", - topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), storedOffset + topicPartition.topic(), topicPartition.partition(), storedOffset ); } writeToHdfsEarly(); @@ -147,7 +147,7 @@ public void delete() { LOGGER .debug( "PartitionFileImpl-object representing topic {} partition {} was notified of consumer group rebalance. Deleting syslogFile allocated to the object at {}", - topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), syslogFile.getPath() + topicPartition.topic(), topicPartition.partition(), syslogFile.getPath() ); } syslogFile.delete(); @@ -156,7 +156,12 @@ public void delete() { // Writes the file to hdfs and initializes new file. private void writeToHdfs(long offset) throws IOException { try ( - HDFSWrite writer = new HDFSWrite(hdfsConfig, topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset) + HDFSWrite writer = new HDFSWrite( + hdfsConfig, + topicPartition.topic(), + Integer.toString(topicPartition.partition()), + offset + ) ) { writer.commit(syslogFile); // commits the final AVRO-file to HDFS. } @@ -166,7 +171,7 @@ private void writeToHdfs(long offset) throws IOException { LOGGER .debug( "SyslogFile representing topic {} partition {} stored to HDFS with offset value of {}. SyslogFile allocated to the object located at {} has been deleted to prepare for storing new records.", - topicPartition.get("topic").getAsString(), topicPartition.get("partition").getAsString(), offset, syslogFile.getPath() + topicPartition.topic(), topicPartition.partition(), offset, syslogFile.getPath() ); } } diff --git a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java index 9a78d936..21907e26 100644 --- a/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java +++ b/src/main/java/com/teragrep/cfe_39/consumers/kafka/PartitionRecordsImpl.java @@ -82,12 +82,16 @@ public List toSyslogRecordList() { if (config.skipNonRFC5424Records()) { LOGGER .warn( - "Skipping parsing a non RFC5424 record, record metadata: <{}>. Exception information: ", - next.offsetToJSON(), e + "Skipping parsing a non RFC5424 record, record topic partition: <{}> offset:<{}>. Exception information: ", + next.topicPartition(), next.offset(), e ); } else { - LOGGER.error("Failed to parse RFC5424 record <{}>", next.offsetToJSON()); + LOGGER + .error( + "Failed to parse RFC5424 record <{}> offset:<{}>", next.topicPartition(), + next.offset() + ); throw new RuntimeException(e); } } @@ -95,12 +99,16 @@ public List toSyslogRecordList() { if (config.skipEmptyRFC5424Records()) { LOGGER .warn( - "Skipping parsing an empty RFC5424 record, record metadata: <{}>. Exception information: ", - next.offsetToJSON(), e + "Skipping parsing an empty RFC5424 record, record topic partition: <{}> offset:<{}>. Exception information: ", + next.topicPartition(), next.offset(), e ); } else { - LOGGER.error("Failed to parse RFC5424 record <{}> because of null content", next.offsetToJSON()); + LOGGER + .error( + "Failed to parse RFC5424 record <{}> offset:<{}> because of null content", + next.topicPartition(), next.offset() + ); throw new RuntimeException(e); } } From 96fa63ed83f632ae91685427eb07453cf95de42b Mon Sep 17 00:00:00 2001 From: Tiihott <48@teragrep.com> Date: Wed, 11 Dec 2024 14:10:30 +0200 Subject: [PATCH 77/77] Fixed tests to use target directory for avro file generation. --- .../java/com/teragrep/cfe_39/BatchDistributionTest.java | 6 +++++- src/test/java/com/teragrep/cfe_39/HdfsTest.java | 6 +++++- .../com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java | 7 +++++-- src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java | 6 +++++- .../com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java | 6 +++++- .../java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java | 6 +++++- .../java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java | 6 +++++- .../com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java | 6 +++++- src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java | 6 +++++- .../java/com/teragrep/cfe_39/ProcessingFailureTest.java | 6 +++++- .../java/com/teragrep/cfe_39/SyslogAvroWriterTest.java | 6 +++++- 11 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java index b8e95aca..c13c17f2 100644 --- a/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java +++ b/src/test/java/com/teragrep/cfe_39/BatchDistributionTest.java @@ -93,11 +93,15 @@ public class BatchDistributionTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/HdfsTest.java b/src/test/java/com/teragrep/cfe_39/HdfsTest.java index 1fb25ea2..b84f8361 100644 --- a/src/test/java/com/teragrep/cfe_39/HdfsTest.java +++ b/src/test/java/com/teragrep/cfe_39/HdfsTest.java @@ -81,11 +81,15 @@ public class HdfsTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java index 31ea8183..95e49247 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesLowSizeTest.java @@ -83,11 +83,15 @@ public class Ingestion0FilesLowSizeTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); @@ -236,7 +240,6 @@ public void ingestion0FilesLowSizeTest() { Assertions.assertFalse(reader.hasNext()); reader.close(); avroFile.delete(); - ; } }); } diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java index cf5fe5f8..ac254eef 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion0FilesTest.java @@ -81,11 +81,15 @@ public class Ingestion0FilesTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java index 88e3d38b..4c5e616f 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion1Old1NewFileTest.java @@ -84,11 +84,15 @@ public class Ingestion1Old1NewFileTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java index c61fa313..f0a3302d 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2NewFilesTest.java @@ -82,11 +82,15 @@ public class Ingestion2NewFilesTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java index 288037e0..73b68fd0 100644 --- a/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java +++ b/src/test/java/com/teragrep/cfe_39/Ingestion2OldFilesTest.java @@ -84,11 +84,15 @@ public class Ingestion2OldFilesTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java index 1dfd57ce..a72e7ca1 100644 --- a/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java +++ b/src/test/java/com/teragrep/cfe_39/IngestionConsumerTimeoutTest.java @@ -83,11 +83,15 @@ public class IngestionConsumerTimeoutTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java index f69eeed0..fb257e69 100644 --- a/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java +++ b/src/test/java/com/teragrep/cfe_39/KafkaConsumerTest.java @@ -85,11 +85,15 @@ public class KafkaConsumerTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true"); diff --git a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java index 3cffd0b4..0c0bd10e 100644 --- a/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java +++ b/src/test/java/com/teragrep/cfe_39/ProcessingFailureTest.java @@ -89,11 +89,15 @@ public class ProcessingFailureTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "false"); map.put("skipEmptyRFC5424Records", "false"); diff --git a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java index 1de10743..43e2a621 100644 --- a/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java +++ b/src/test/java/com/teragrep/cfe_39/SyslogAvroWriterTest.java @@ -73,11 +73,15 @@ public class SyslogAvroWriterTest { @BeforeEach public void startMiniCluster() { assertDoesNotThrow(() -> { + File queueDir = new File(System.getProperty("user.dir") + "/target/AVRO"); + if (!queueDir.exists()) { + queueDir.mkdirs(); + } Map map = new HashMap<>(); map.put("log4j2.configurationFile", "/opt/teragrep/cfe_39/etc/log4j2.properties"); map.put("egress.configurationFile", "/opt/teragrep/cfe_39/etc/egress.properties"); map.put("ingress.configurationFile", "/opt/teragrep/cfe_39/etc/ingress.properties"); - map.put("queueDirectory", System.getProperty("user.dir") + "/etc/AVRO/"); + map.put("queueDirectory", System.getProperty("user.dir") + "/target/AVRO/"); map.put("queueTopicPattern", "^testConsumerTopic-*$"); map.put("skipNonRFC5424Records", "true"); map.put("skipEmptyRFC5424Records", "true");