tfeldmann · dpomykala · Nov 10, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## [Unreleased]
 
+- Added support for other plain-text file formats in the `filecontent` filter (#464).
 - Fixed #438 (`filecontent` filter fails for PDFs when `pdftotext` isn't installed, instead of falling back to `pdfminer`)
 
 ## v3.3.0 (2024-11-25)

diff --git a/organize/filters/filecontent.py b/organize/filters/filecontent.py
@@ -79,26 +79,21 @@ def extract_docx(path: Path) -> str:
     return clean(result)
 
 
-EXTRACTORS: Dict[str, Callable[[Path], str]] = {
-    ".md": extract_txt,
-    ".txt": extract_txt,
-    ".log": extract_txt,
+SPECIALIZED_EXTRACTORS: Dict[str, Callable[[Path], str]] = {
     ".pdf": extract_pdf,
     ".docx": extract_docx,
 }
 
 
 def textract(path: Path) -> str:
-    extractor = EXTRACTORS[path.suffix.lower()]
+    extractor = SPECIALIZED_EXTRACTORS.get(path.suffix.lower(), extract_txt)
     return extractor(path)
 
 
 @dataclass(config=ConfigDict(coerce_numbers_to_str=True, extra="forbid"))
 class FileContent:
     """Matches file content with the given regular expression.
 
-    Supports .md, .txt, .log, .pdf and .docx files.
-
     For PDF content extraction poppler should be installed for the `pdftotext` command.
     If this is not available `filecontent` will fall back to the `pdfminer` library.
 

diff --git a/tests/filters/test_filecontent.py b/tests/filters/test_filecontent.py
@@ -1,14 +1,23 @@
 from conftest import make_files, read_files
 
 from organize import Config
+from organize.filters import filecontent
 
 
-def test_filecontent(fs):
+def test_filecontent(fs, monkeypatch):
+    # Mock extractor functions for PDF and DOCX - return fixed values
+    monkeypatch.setitem(filecontent.SPECIALIZED_EXTRACTORS, ".pdf", lambda _: "PDF")
+    monkeypatch.setitem(filecontent.SPECIALIZED_EXTRACTORS, ".docx", lambda _: "DOCX")
+
     # inspired by https://github.com/tfeldmann/organize/issues/43
     files = {
-        "Test1.txt": "Lorem MegaCorp Ltd. ipsum\nInvoice 12345\nMore text\nID: 98765",
+        "Test1": "Lorem MegaCorp Ltd. ipsum\nInvoice 12345\nMore text\nID: 98765",
         "Test2.txt": "Tests",
         "Test3.txt": "My Homework ...",
+        "test4.xml": "XML",
+        # Content is not important as we mock extractors
+        "test5.pdf": "",
+        "test6.docx": "",
     }
     make_files(files, "test")
     Config.from_string(
@@ -24,10 +33,18 @@ def test_filecontent(fs):
             - filecontent: '.*Homework.*'
           actions:
             - rename: "Homework.txt"
+        - locations: "/test"
+          filters:
+            - filecontent: '(?P<all>XML|PDF|DOCX)'
+          actions:
+            - rename: '{filecontent.all}'
         """
     ).execute(simulate=False)
     assert read_files("test") == {
         "Homework.txt": "My Homework ...",
         "MegaCorp_Invoice_12345.txt": "Lorem MegaCorp Ltd. ipsum\nInvoice 12345\nMore text\nID: 98765",
         "Test2.txt": "Tests",
+        "XML": "XML",
+        "PDF": "",
+        "DOCX": "",
     }