Skip to content

Commit e186603

Browse files
Airtable API integration (#22)
Adds an Airtable datasource. Adapts the existing RestDatasource to Airtable's API conventions and some conveniences for getting at a base API via its id and table name.
1 parent e1a6cd1 commit e186603

File tree

6 files changed

+289
-2
lines changed

6 files changed

+289
-2
lines changed

.github/workflows/pull-request.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,6 @@ jobs:
3333
key: do-app-baseimage-django-node:364385f9d196a2bbe2d5faea025520cc0316501f-poetry-${{ hashFiles('poetry.lock') }}
3434
- run: make install
3535
- run: make ci
36+
env:
37+
EXAMPLE_AIRTABLE_BASE: ${{ secrets.EXAMPLE_AIRTABLE_BASE }}
38+
EXAMPLE_AIRTABLE_API_KEY: ${{ secrets.EXAMPLE_AIRTABLE_API_KEY }}
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
from typing import Any, Dict, Iterable, Optional, TypeVar
2+
3+
import dataclasses
4+
5+
from django.conf import settings
6+
from rest_framework_dataclasses.field_utils import get_type_info
7+
8+
from groundwork.core.datasources import RestDatasource
9+
10+
ResourceT = TypeVar("ResourceT")
11+
12+
13+
def airtable_field(name: str, **kwargs: Dict[str, Any]) -> dataclasses.Field:
14+
"""
15+
Return a [dataclass field](https://docs.python.org/3/library/dataclasses.html#dataclasses.Field) used to annotate
16+
a Resource class with the name of the column in Airtable.
17+
18+
For example, if you have an Airtable like this:
19+
20+
| First Name | Last Name |
21+
| ----------- | ---------- |
22+
| Stafford | Beer |
23+
| Clara | Zetkin |
24+
25+
You could map it onto a django model like this:
26+
27+
```python
28+
@dataclass
29+
class People:
30+
id: str
31+
first_name: str = airtable_field('First Name')
32+
last_name: str = airtable_field('Last Name')
33+
```
34+
35+
If you do not annotate your field like this, `AirtableDatasource` will expect your column in Airtable to have the
36+
same name as your Resource class.
37+
38+
Args:
39+
name: Airtable column name associated with this field.
40+
kwargs: Keyword args passed to [dataclasses.field](https://docs.python.org/3/library/dataclasses.html#dataclasses.field).
41+
42+
Returns:
43+
A dataclass field descriptor identifying the corresponding Airtable column.
44+
45+
"""
46+
metadata = {__name__: {"airtable_field": name}}
47+
metadata.update(kwargs.pop("metadata", None) or {})
48+
49+
return dataclasses.field(metadata=metadata, **kwargs)
50+
51+
52+
class AirtableDatasource(RestDatasource[ResourceT]):
53+
"""
54+
Base class for implementing clients to Airtable bases and converting their responses to resource objects.
55+
56+
You are encouraged to use Python's inbuilt [`@dataclass`](https://docs.python.org/3/library/dataclasses.html)
57+
decorator and define type hints when defining these classes as this allows type-safe serializers to be
58+
auto-generated and decreases the amount of boilerplate code that you need to write.
59+
60+
__Example:__
61+
62+
Let's assume we have a public airtable with the base id `4rQYK6P56My`. It contains a table called 'Active Members',
63+
which looks like this:
64+
65+
| First Name | Last Name |
66+
| ----------- | ---------- |
67+
| Stafford | Beer |
68+
| Clara | Zetkin |
69+
70+
71+
We can create a datasource for it as follows:
72+
73+
```python
74+
from dataclasses import dataclass
75+
from groundwork.contrib.airtable.datasources import AirtableDatasource, airtable_field
76+
77+
@dataclass
78+
class Person:
79+
id: str
80+
first_name: str = airtable_field('First Name')
81+
last_name: str = airtable_field('Last Name')
82+
83+
my_datasource = AirtableDatasource(
84+
base_id="4rQYK6P56My",
85+
table_name="Active Members",
86+
resource_class=Person,
87+
)
88+
```
89+
90+
As with other datasource types, configuration can all either be provided as keyword-args to the constructor, or
91+
overridden in subclasses.
92+
"""
93+
94+
base_url = "https://api.airtable.com/v0"
95+
96+
api_key: str
97+
"""
98+
Airtable API key. Required for private Airtable bases. If not defined, will default to the value of
99+
`django.conf.settings.AIRTABLE_API_KEY`.
100+
"""
101+
102+
base_id: Optional[str] = None
103+
"""
104+
ID of the airtable base. You can find this in your base's [API Docs](https://airtable.com/api)
105+
"""
106+
107+
table_name: Optional[str] = None
108+
"""
109+
Name of the table to fetch from.
110+
"""
111+
112+
def __init__(self, resource_type: ResourceT, base=None, table=None, **kwargs):
113+
super().__init__(resource_type=resource_type, **kwargs)
114+
115+
if not getattr(self, "path", None):
116+
assert self.base_id
117+
assert self.table_name
118+
self.path = f"/{self.base_id}/{self.table_name}"
119+
120+
if not hasattr(self, "api_key"):
121+
self.api_key = getattr(settings, "AIRTABLE_API_KEY", None)
122+
123+
def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]:
124+
offset = None
125+
126+
while True:
127+
if offset is not None:
128+
query["offset"] = offset
129+
data = self.fetch_url(self.url, query)
130+
131+
yield from data["records"]
132+
133+
offset = data.get("offset")
134+
if offset is None:
135+
return
136+
137+
def deserialize(self, data: Dict[str, Any]) -> ResourceT:
138+
field_data = data["fields"]
139+
140+
mapped_data = {
141+
field.name: self._get_mapped_field_value(field, field_data)
142+
for field in dataclasses.fields(self.resource_type)
143+
}
144+
mapped_data["id"] = data["id"]
145+
146+
return super().deserialize(mapped_data)
147+
148+
def get_headers(self) -> Dict[str, str]:
149+
headers = {}
150+
151+
if self.api_key:
152+
headers["Authorization"] = f"Bearer {self.api_key}"
153+
154+
return headers
155+
156+
def _get_mapped_field_name(self, field: dataclasses.Field) -> str:
157+
"""
158+
Look up the mapped field name expected from the Airtable response.
159+
160+
Args:
161+
field: Dataclass field descriptor for the resource field
162+
163+
Returns:
164+
Airtable column name defined in the field's metadata. Returns the field name if none found,
165+
"""
166+
167+
if __name__ not in field.metadata:
168+
return field.name
169+
170+
return field.metadata[__name__]["airtable_field"]
171+
172+
def _get_mapped_field_value(
173+
self, field: dataclasses.Field, data: Dict[str, Any]
174+
) -> Any:
175+
"""
176+
Handle the fact that Airtable omits fields for 'falsy' values. Use the field metadata to determine if we have
177+
a type supporting a 'falsy' value and return it if missing from the airtable response.
178+
179+
Args:
180+
field: Dataclass field descriptor for the resource field.
181+
data: The raw json object containing field values returned by Airtable.
182+
183+
Returns:
184+
The value in `data` identified by `field`, with the appropriate 'falsy' value substituted for missing values
185+
if relevant to the field type.
186+
"""
187+
188+
mapped_name = self._get_mapped_field_name(field)
189+
if mapped_name in data:
190+
return data[mapped_name]
191+
192+
type_info = get_type_info(field.type)
193+
194+
if type_info.base_type == bool:
195+
return False
196+
197+
if type_info.base_type == str:
198+
return ""
199+
200+
if type_info.is_mapping:
201+
return {}
202+
203+
if type_info.is_many:
204+
return []
205+
206+
return None

groundwork/core/datasources.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
cast,
1212
)
1313

14+
import dataclasses
1415
import uuid
1516
from abc import ABCMeta, abstractmethod
1617
from dataclasses import dataclass
@@ -20,6 +21,7 @@
2021
import requests
2122
from django.db import models
2223
from rest_framework import parsers, serializers
24+
from rest_framework_dataclasses.field_utils import get_type_info
2325
from rest_framework_dataclasses.serializers import DataclassSerializer
2426

2527
from groundwork.core.cron import register_cron
@@ -149,7 +151,6 @@ class RestDatasource(Datasource[ResourceT]):
149151
def __init__(self, **kwargs: Dict[str, Any]) -> None:
150152
super().__init__(**kwargs)
151153

152-
self.url = f"{self.base_url}{self.path}"
153154
self.parser = self.parser_class()
154155

155156
assert self.resource_type is not None
@@ -158,9 +159,32 @@ def __init__(self, **kwargs: Dict[str, Any]) -> None:
158159
self.serializer_class = type(
159160
f"{self.resource_type.__name__}Serializer",
160161
(DataclassSerializer,),
161-
{"Meta": type("Meta", (), {"dataclass": self.resource_type})},
162+
{
163+
"Meta": type(
164+
"Meta",
165+
(),
166+
{
167+
"dataclass": self.resource_type,
168+
"extra_kwargs": {
169+
field.name: self.get_serializer_field_kwargs(field)
170+
for field in dataclasses.fields(self.resource_type)
171+
},
172+
},
173+
)
174+
},
162175
)
163176

177+
def get_serializer_field_kwargs(self, field: dataclasses.Field):
178+
type_info = get_type_info(field.type)
179+
180+
if type_info.base_type == str:
181+
return {"allow_blank": True}
182+
183+
if type_info.is_mapping or type_info.is_many:
184+
return {"allow_empty": True}
185+
186+
return {}
187+
164188
def get(self, id: str, **kwargs: Dict[str, Any]) -> ResourceT:
165189
"""
166190
Get a resource by id, deserialize to the resource_type and return.
@@ -285,6 +309,10 @@ def paginate(self, **query: Dict[str, Any]) -> Iterable[ResourceT]:
285309

286310
yield from self.fetch_url(self.url, query)
287311

312+
@property
313+
def url(self) -> str:
314+
return f"{self.base_url}{self.path}"
315+
288316

289317
@dataclass
290318
class SyncConfig:

mkdocs.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ nav:
1919
- UK Geographical Data:
2020
- Postcode Geolocation: api/groundwork.geo.territories.uk.postcodes.md
2121
- Parliament API: api/groundwork.geo.territories.uk.parliament.md
22+
- Integrations:
23+
- Airtable:
24+
- Data Sources: api/groundwork.contrib.airtable.datasources.md
2225
- Contributing:
2326
- Contribution Guidelines: contributing.md
2427
- Developer Setup: developing.md

settings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
INSTALLED_APPS = [
2222
"groundwork.core",
2323
"groundwork.geo",
24+
"groundwork.contrib.airtable",
2425
"test",
2526
"example",
2627
"django_vite",
@@ -194,6 +195,12 @@
194195
"127.0.0.1",
195196
]
196197

198+
# Test settings
199+
200+
EXAMPLE_AIRTABLE_BASE = os.getenv("EXAMPLE_AIRTABLE_BASE")
201+
EXAMPLE_AIRTABLE_API_KEY = os.getenv("EXAMPLE_AIRTABLE_API_KEY")
202+
203+
197204
try:
198205
from local import *
199206
except ImportError:
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import os
2+
from dataclasses import dataclass
3+
from test.tags import integration_test
4+
5+
from django.conf import settings
6+
from django.test import TestCase
7+
8+
from groundwork.contrib.airtable import datasources
9+
10+
11+
@integration_test
12+
class AirtableApiTests(TestCase):
13+
def setUp(self):
14+
self.datasource = datasources.AirtableDatasource(
15+
resource_type=MyResource,
16+
api_key=settings.EXAMPLE_AIRTABLE_API_KEY,
17+
base_id=settings.EXAMPLE_AIRTABLE_BASE,
18+
table_name="Table 1",
19+
)
20+
21+
def test_can_paginate_list(self):
22+
self.assertListReturnsAtLeastCount(self.datasource, 120)
23+
24+
def test_can_get(self):
25+
self.assertCanGetResourceReturnedFromList(self.datasource)
26+
27+
def assertListReturnsAtLeastCount(self, resource_type, expected):
28+
results = list(resource_type.list())
29+
self.assertGreater(len(results), expected)
30+
31+
def assertCanGetResourceReturnedFromList(self, resource_type):
32+
resource = next(resource_type.list())
33+
resource_type.get(resource_type.get_id(resource))
34+
35+
36+
@dataclass
37+
class MyResource:
38+
id: str
39+
name: str = datasources.airtable_field("Name")
40+
notes: str = datasources.airtable_field("Notes")

0 commit comments

Comments
 (0)