diff --git a/README.md b/README.md index db395ce..9dd4f7e 100644 --- a/README.md +++ b/README.md @@ -484,6 +484,21 @@ jupyter notebook examples/an_awesome_example.ipynb Grouped Dataset Tips & Tricks Learn how to work with grouped datasets in FiftyOne + + + + + + + + + + + + + Dynamic Group Tips & Tricks + Learn how to work with dynamic grouping in FiftyOne + ## Contributing diff --git a/examples/Dynamic Group.ipynb b/examples/Dynamic Group.ipynb new file mode 100644 index 0000000..469c169 --- /dev/null +++ b/examples/Dynamic Group.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " Try in Google Colab\n", + " \n", + " \n", + " \n", + " \n", + " Share via nbviewer\n", + " \n", + " \n", + " \n", + " \n", + " View on GitHub\n", + " \n", + " \n", + " \n", + " \n", + " Download notebook\n", + " \n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "id": "5eed0e23", + "metadata": {}, + "source": [ + "# **FiftyOne Dynamic Group**" + ] + }, + { + "cell_type": "markdown", + "id": "47ee0b18", + "metadata": {}, + "source": [ + "## Creating a Dynamic Group View with `group_by`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c976910b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Split 'test' already downloaded\n", + "Loading 'cifar10' split 'test'\n", + " 100% |█████████████| 10000/10000 [2.9s elapsed, 0s remaining, 3.5K samples/s] \n", + "Dataset 'cifar10-test' created\n", + "group\n", + "10\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import fiftyone as fo\n", + "import fiftyone.zoo as foz\n", + "from fiftyone import ViewField as F\n", + "\n", + "dataset = foz.load_zoo_dataset(\"cifar10\", split=\"test\")\n", + "\n", + "# Take 100 samples and group by ground truth label\n", + "view = dataset.take(100, seed=51).group_by(\"ground_truth.label\")\n", + "\n", + "print(view.media_type) # group\n", + "print(len(view)) # 10\n", + "session = fo.launch_app(view)" + ] + }, + { + "cell_type": "markdown", + "id": "f96e7761", + "metadata": {}, + "source": [ + "## Grouping and ordering frames by their original sample_id" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "23453f14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset already downloaded\n", + "Loading existing dataset 'quickstart-video'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use\n", + "Name: 2023.09.08.10.04.14\n", + "Media type: image\n", + "Num samples: 1279\n", + "Persistent: False\n", + "Tags: []\n", + "Sample fields:\n", + " id: fiftyone.core.fields.ObjectIdField\n", + " filepath: fiftyone.core.fields.StringField\n", + " tags: fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)\n", + " metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)\n", + " sample_id: fiftyone.core.fields.ObjectIdField\n", + " frame_number: fiftyone.core.fields.FrameNumberField\n", + " detections: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "dataset2 = (\n", + " foz.load_zoo_dataset(\"quickstart-video\")\n", + " .to_frames(sample_frames=True)\n", + " .clone()\n", + ")\n", + "print(dataset2) #1279 samples\n", + "session.view = dataset2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1afee4cb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n", + "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n" + ] + } + ], + "source": [ + "view2 = dataset2.group_by(\"sample_id\", order_by=\"frame_number\")\n", + "\n", + "print(len(view2)) # 10\n", + "print(view2.values(\"frame_number\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d65eb4b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "session.view = view2" + ] + }, + { + "cell_type": "markdown", + "id": "0b012b32", + "metadata": {}, + "source": [ + "## Working with dynamic group views" + ] + }, + { + "cell_type": "markdown", + "id": "24529a67", + "metadata": {}, + "source": [ + "### `get_dynamic_group()`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "edcb2b12", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119]\n" + ] + } + ], + "source": [ + "sample_id = dataset2.take(1).first().sample_id\n", + "video = view2.get_dynamic_group(sample_id)\n", + "\n", + "print(video.values(\"frame_number\"))" + ] + }, + { + "cell_type": "markdown", + "id": "195852f3", + "metadata": {}, + "source": [ + "### `group_by` with condition" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8d16b4fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset already downloaded\n", + "Loading existing dataset 'quickstart'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use\n", + "26\n", + "26\n" + ] + } + ], + "source": [ + "dataset3 = foz.load_zoo_dataset(\"quickstart\")\n", + "\n", + "# Group samples by the number of ground truth objects they contain\n", + "expr = F(\"ground_truth.detections\").length()\n", + "view3 = dataset3.group_by(expr)\n", + "\n", + "print(len(view3)) # 26\n", + "print(len(dataset3.distinct(expr))) # 26" + ] + }, + { + "cell_type": "markdown", + "id": "1176627d", + "metadata": {}, + "source": [ + "### Iterating through dynamic group" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8c54c681", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "airplane\n", + "automobile\n", + "bird\n", + "cat\n", + "deer\n", + "dog\n", + "frog\n", + "horse\n", + "ship\n", + "truck\n" + ] + } + ], + "source": [ + "# Sort the groups by label\n", + "sorted_view = view.sort_by(\"ground_truth.label\")\n", + "\n", + "for sample in sorted_view:\n", + " print(sample.ground_truth.label)" + ] + }, + { + "cell_type": "markdown", + "id": "84d474be", + "metadata": {}, + "source": [ + "### flatten()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "fde6bfff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "100\n", + "['airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'automobile', 'automobile', 'automobile', 'automobile', 'automobile', 'automobile', 'automobile', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'bird', 'cat', 'cat', 'cat', 'cat', 'cat', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'deer', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'frog', 'frog', 'frog', 'frog', 'frog', 'frog', 'frog', 'frog', 'frog', 'frog', 'horse', 'horse', 'horse', 'horse', 'horse', 'horse', 'horse', 'horse', 'horse', 'horse', 'ship', 'ship', 'ship', 'ship', 'ship', 'ship', 'ship', 'ship', 'ship', 'ship', 'truck', 'truck', 'truck', 'truck', 'truck', 'truck', 'truck', 'truck', 'truck', 'truck', 'truck']\n" + ] + } + ], + "source": [ + "# Unwind the sorted groups back into a flat collection\n", + "flat_`sorted_view = sorted_view.flatten()\n", + "\n", + "print(len(flat_sorted_view)) # 1000\n", + "print(flat_sorted_view.values(\"ground_truth.label\"))\n", + "# ['airplane', 'airplane', 'airplane', ..., 'truck']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "341751a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n" + ] + } + ], + "source": [ + "group = view.get_dynamic_group(\"horse\")\n", + "print(len(group)) # 10" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "de952812", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "airplane: 14\n", + "automobile: 7\n", + "bird: 9\n", + "cat: 5\n", + "deer: 13\n", + "dog: 11\n", + "frog: 10\n", + "horse: 10\n", + "ship: 10\n", + "truck: 11\n" + ] + } + ], + "source": [ + "for group in sorted_view.iter_dynamic_groups():\n", + " print(\"%s: %d\" % (group.first().ground_truth.label, len(group)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0c64692", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}