@@ -392,4 +392,100 @@ void testMixedEntryFormats() {
392392 Assertions .assertEquals (DateHelper .parse ("20200103" ), helper .getEarliestOccurrenceOfFieldWithType ("NAME" , "maze" , accumuloClient , null ));
393393 }
394394 }
395+
396+ /**
397+ * Tests for {@link MetadataHelper#getMissingFieldsInDateRange(Set, Set, String, String, Set)}.
398+ */
399+ @ Nested
400+ public class GetMissingFieldsInDateRangeTest {
401+ /**
402+ * Test against a table that has only non-aggregated entries as matches.
403+ */
404+ @ Test
405+ void testNonAggregatedEntriesOnly () throws TableNotFoundException {
406+ givenNonAggregatedFrequencyRows ("NAME" , COLF_F , "csv" , "20200103" , "20200120" , 1L );
407+ givenNonAggregatedFrequencyRows ("NAME" , COLF_F , "wiki" , "20200101" , "20200120" , 2L );
408+ givenNonAggregatedFrequencyRows ("NAME" , COLF_F , "maze" , "20200105" , "20200120" , 3L );
409+ givenNonAggregatedFrequencyRows ("NAME" , COLF_F , "data" , "20200107" , "20200102" , 3L );
410+ givenNonAggregatedFrequencyRows ("EVENT_DATE" , COLF_F , "csv" , "20200101" , "20200120" , 4L );
411+ givenNonAggregatedFrequencyRows ("EVENT_DATE" , COLF_F , "wiki" , "20200101" , "20200120" , 5L );
412+ givenNonAggregatedFrequencyRows ("EVENT_DATE" , COLF_F , "maze" , "20200101" , "20200120" , 6L );
413+ writeMutations ();
414+
415+ // No DataTypes
416+ Assertions .assertEquals (Collections .emptySet (), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Collections .emptySet (), "20200101" ,
417+ "20200120" , Collections .emptySet ()));
418+ // Using DataTypes
419+ Assertions .assertEquals (Set .of ("EVENT_DATE" ),
420+ helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Set .of ("data" ), "20200101" , "20200120" , Collections .emptySet ()));
421+ // Fictitious field
422+ Assertions .assertEquals (Set .of ("FOO" ), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" , "FOO" ),
423+ Set .of ("wiki" , "data" , "csv" , "maze" ), "20200101" , "20200120" , Collections .emptySet ()));
424+ // Missing because of date range
425+ Assertions .assertEquals (Set .of ("NAME" , "EVENT_DATE" ), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Set .of ("wiki" , "data" ),
426+ "20190101" , "20191231" , Collections .emptySet ()));
427+ }
428+
429+ /**
430+ * Test against a table that has only aggregated entries as matches.
431+ */
432+ @ Test
433+ void testAggregatedEntriesOnly () throws TableNotFoundException {
434+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "csv" , createDateFrequencyMap ("20200113" , 1L , "20200115" , 5L , "20200116" , 3L ));
435+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "wiki" , createDateFrequencyMap ("20200111" , 1L , "20200112" , 15L , "20200113" , 3L ));
436+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "maze" , createDateFrequencyMap ("20200102" , 1L , "20200104" , 55L , "20200105" , 3L ));
437+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "data" , createDateFrequencyMap ("20200101" , 1L , "20200103" , 3L ));
438+ givenAggregatedFrequencyRow ("EVENT_DATE" , COLF_F , "csv" , createDateFrequencyMap ("20200101" , 2L , "20200102" , 3L , "20200103" , 4L ));
439+ givenAggregatedFrequencyRow ("EVENT_DATE" , COLF_F , "wiki" , createDateFrequencyMap ("20200101" , 2L , "20200102" , 3L , "20200103" , 4L ));
440+ givenAggregatedFrequencyRow ("EVENT_DATE" , COLF_F , "maze" , createDateFrequencyMap ("20200101" , 2L , "20200102" , 3L , "20200103" , 4L ));
441+ writeMutations ();
442+
443+ // No DataTypes
444+ Assertions .assertEquals (Collections .emptySet (), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Collections .emptySet (), "20200101" ,
445+ "20200120" , Collections .emptySet ()));
446+ // Using DataTypes
447+ Assertions .assertEquals (Set .of ("EVENT_DATE" ),
448+ helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Set .of ("data" ), "20200101" , "20200120" , Collections .emptySet ()));
449+ // Fictitious field
450+ Assertions .assertEquals (Set .of ("FOO" ), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" , "FOO" ),
451+ Set .of ("wiki" , "data" , "csv" , "maze" ), "20200101" , "20200120" , Collections .emptySet ()));
452+ // Missing because of date range
453+ Assertions .assertEquals (Set .of ("NAME" , "EVENT_DATE" ), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Set .of ("wiki" , "data" ),
454+ "20190101" , "20191231" , Collections .emptySet ()));
455+ }
456+
457+ /**
458+ * Test against a table that has both aggregated and non-aggregated entries as matches.
459+ */
460+ @ Test
461+ void testMixedEntryFormats () throws TableNotFoundException {
462+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "csv" , createDateFrequencyMap ("20200111" , 1L , "20200112" , 5L , "20200113" , 3L ));
463+ givenNonAggregatedFrequencyRows ("NAME" , COLF_F , "csv" , "20200111" , "20200120" , 1L );
464+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "wiki" , createDateFrequencyMap ("20200111" , 1L , "20200112" , 15L , "20200113" , 3L ));
465+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "maze" , createDateFrequencyMap ("20200111" , 1L , "20200112" , 55L , "20200113" , 3L ));
466+ givenNonAggregatedFrequencyRows ("NAME" , COLF_F , "maze" , "20200103" , "20200120" , 3L );
467+ givenAggregatedFrequencyRow ("NAME" , COLF_F , "data" , createDateFrequencyMap ("20200111" , 1L , "20200113" , 3L ));
468+ givenNonAggregatedFrequencyRows ("NAME" , COLF_F , "data" , "20200101" , "20200115" , 3L );
469+ givenAggregatedFrequencyRow ("EVENT_DATE" , COLF_F , "csv" , createDateFrequencyMap ("20200101" , 2L , "20200102" , 3L , "20200103" , 4L ));
470+ givenAggregatedFrequencyRow ("EVENT_DATE" , COLF_F , "wiki" , createDateFrequencyMap ("20200101" , 2L , "20200102" , 3L , "20200103" , 4L ));
471+ givenAggregatedFrequencyRow ("EVENT_DATE" , COLF_F , "maze" , createDateFrequencyMap ("20200101" , 2L , "20200102" , 3L , "20200103" , 4L ));
472+ givenNonAggregatedFrequencyRows ("EVENT_DATE" , COLF_F , "csv" , "20200101" , "20200120" , 4L );
473+ givenNonAggregatedFrequencyRows ("EVENT_DATE" , COLF_F , "wiki" , "20200101" , "20200120" , 5L );
474+ givenNonAggregatedFrequencyRows ("EVENT_DATE" , COLF_F , "maze" , "20200101" , "20200120" , 6L );
475+ writeMutations ();
476+
477+ // No DataTypes
478+ Assertions .assertEquals (Collections .emptySet (), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Collections .emptySet (), "20200101" ,
479+ "20200120" , Collections .emptySet ()));
480+ // Using DataTypes
481+ Assertions .assertEquals (Set .of ("EVENT_DATE" ),
482+ helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Set .of ("data" ), "20200101" , "20200120" , Collections .emptySet ()));
483+ // Fictitious field
484+ Assertions .assertEquals (Set .of ("FOO" ), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" , "FOO" ),
485+ Set .of ("wiki" , "data" , "csv" , "maze" ), "20200101" , "20200120" , Collections .emptySet ()));
486+ // Missing because of date range
487+ Assertions .assertEquals (Set .of ("NAME" , "EVENT_DATE" ), helper .getMissingFieldsInDateRange (Set .of ("NAME" , "EVENT_DATE" ), Set .of ("wiki" , "data" ),
488+ "20190101" , "20191231" , Collections .emptySet ()));
489+ }
490+ }
395491}
0 commit comments