Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Type annotations for utils #2917

Merged
merged 12 commits into from
Sep 30, 2023
7 changes: 5 additions & 2 deletions bookwyrm/activitypub/base_activity.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" basics for an activitypub serializer """
from __future__ import annotations
from dataclasses import dataclass, fields, MISSING
from json import JSONEncoder
import logging
Expand Down Expand Up @@ -72,8 +73,10 @@ class ActivityObject:

def __init__(
self,
activity_objects: Optional[list[str, base_model.BookWyrmModel]] = None,
**kwargs: dict[str, Any],
activity_objects: Optional[
dict[str, Union[str, list[str], ActivityObject, base_model.BookWyrmModel]]
] = None,
**kwargs: Any,
):
"""this lets you pass in an object with fields that aren't in the
dataclass, which it ignores. Any field in the dataclass is required or
Expand Down
4 changes: 3 additions & 1 deletion bookwyrm/models/author.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
""" database schema for info about authors """
import re
from typing import Tuple, Any

from django.contrib.postgres.indexes import GinIndex
from django.db import models

Expand Down Expand Up @@ -38,7 +40,7 @@ class Author(BookDataModel):
)
bio = fields.HtmlField(null=True, blank=True)

def save(self, *args, **kwargs):
def save(self, *args: Tuple[Any, ...], **kwargs: dict[str, Any]) -> None:
"""normalize isni format"""
if self.isni:
self.isni = re.sub(r"\s", "", self.isni)
Expand Down
9 changes: 8 additions & 1 deletion bookwyrm/utils/cache.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
""" Custom handler for caching """
from typing import Any, Callable, Tuple, Union

from django.core.cache import cache


def get_or_set(cache_key, function, *args, timeout=None):
def get_or_set(
cache_key: str,
function: Callable[..., Any],
*args: Tuple[Any, ...],
timeout: Union[float, None] = None
jderuiter marked this conversation as resolved.
Show resolved Hide resolved
) -> Any:
jderuiter marked this conversation as resolved.
Show resolved Hide resolved
"""Django's built-in get_or_set isn't cutting it"""
value = cache.get(cache_key)
if value is None:
Expand Down
113 changes: 76 additions & 37 deletions bookwyrm/utils/isni.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
"""ISNI author checking utilities"""
import xml.etree.ElementTree as ET
from typing import Union, Optional

import requests

from bookwyrm import activitypub, models


def request_isni_data(search_index, search_term, max_records=5):
def get_element_text(element: Optional[ET.Element]) -> str:
"""If the element is not None and there is a text attribute return this"""
if element is not None and element.text is not None:
return element.text
return ""


def request_isni_data(search_index: str, search_term: str, max_records: int = 5) -> str:
"""Request data from the ISNI API"""

search_string = f'{search_index}="{search_term}"'
query_params = {
query_params: dict[str, Union[str, int]] = {
"query": search_string,
"version": "1.1",
"operation": "searchRetrieve",
Expand All @@ -26,41 +35,52 @@ def request_isni_data(search_index, search_term, max_records=5):
return result.text


def make_name_string(element):
def make_name_string(element: ET.Element) -> str:
"""create a string of form 'personal_name surname'"""

# NOTE: this will often be incorrect, many naming systems
# list "surname" before personal name
forename = element.find(".//forename")
surname = element.find(".//surname")
if forename is not None:
return "".join([forename.text, " ", surname.text])
return surname.text

forename_text = get_element_text(forename)
surname_text = get_element_text(surname)

return "".join(
[forename_text, " " if forename_text and surname_text else "", surname_text]
)


def get_other_identifier(element, code):
def get_other_identifier(element: ET.Element, code: str) -> str:
"""Get other identifiers associated with an author from their ISNI record"""

identifiers = element.findall(".//otherIdentifierOfIdentity")
for section_head in identifiers:
if (
section_head.find(".//type") is not None
and section_head.find(".//type").text == code
and section_head.find(".//identifier") is not None
(section_type := section_head.find(".//type")) is not None
and section_type.text is not None
and section_type.text == code
and (identifier := section_head.find(".//identifier")) is not None
and identifier.text is not None
):
return section_head.find(".//identifier").text
return identifier.text

# if we can't find it in otherIdentifierOfIdentity,
# try sources
for source in element.findall(".//sources"):
code_of_source = source.find(".//codeOfSource")
if code_of_source is not None and code_of_source.text.lower() == code.lower():
return source.find(".//sourceIdentifier").text
if (
(code_of_source := source.find(".//codeOfSource")) is not None
and code_of_source.text is not None
and code_of_source.text.lower() == code.lower()
and (source_identifier := source.find(".//sourceIdentifier")) is not None
and source_identifier.text is not None
):
return source_identifier.text

return ""


def get_external_information_uri(element, match_string):
def get_external_information_uri(element: ET.Element, match_string: str) -> str:
"""Get URLs associated with an author from their ISNI record"""

sources = element.findall(".//externalInformation")
Expand All @@ -69,14 +89,18 @@ def get_external_information_uri(element, match_string):
uri = source.find(".//URI")
if (
uri is not None
and uri.text is not None
and information is not None
and information.text is not None
and information.text.lower() == match_string.lower()
):
return uri.text
return ""


def find_authors_by_name(name_string, description=False):
def find_authors_by_name(
name_string: str, description: bool = False
) -> list[activitypub.Author]:
"""Query the ISNI database for possible author matches by name"""

payload = request_isni_data("pica.na", name_string)
Expand All @@ -92,7 +116,11 @@ def find_authors_by_name(name_string, description=False):
if not personal_name:
continue

author = get_author_from_isni(element.find(".//isniUnformatted").text)
author = get_author_from_isni(
get_element_text(element.find(".//isniUnformatted"))
)
if author is None:
continue

if bool(description):

Expand All @@ -111,22 +139,23 @@ def find_authors_by_name(name_string, description=False):
# some of the "titles" in ISNI are a little ...iffy
# @ is used by ISNI/OCLC to index the starting point ignoring stop words
# (e.g. "The @Government of no one")
title_elements = [
e
for e in titles
if hasattr(e, "text") and not e.text.replace("@", "").isnumeric()
]
if len(title_elements):
author.bio = title_elements[0].text.replace("@", "")
else:
author.bio = None
author.bio = ""
for title in titles:
if (
title is not None
and hasattr(title, "text")
and title.text is not None
and not title.text.replace("@", "").isnumeric()
):
author.bio = title.text.replace("@", "")
break

possible_authors.append(author)

return possible_authors


def get_author_from_isni(isni):
def get_author_from_isni(isni: str) -> Optional[activitypub.Author]:
"""Find data to populate a new author record from their ISNI"""

payload = request_isni_data("pica.isn", isni)
Expand All @@ -135,47 +164,57 @@ def get_author_from_isni(isni):
# there should only be a single responseRecord
# but let's use the first one just in case
element = root.find(".//responseRecord")
name = make_name_string(element.find(".//forename/.."))
if element is None:
return None

name = (
make_name_string(forename)
if (forename := element.find(".//forename/..")) is not None
else ""
)
viaf = get_other_identifier(element, "viaf")
# use a set to dedupe aliases in ISNI
aliases = set()
aliases_element = element.findall(".//personalNameVariant")
for entry in aliases_element:
aliases.add(make_name_string(entry))
# aliases needs to be list not set
aliases = list(aliases)
bio = element.find(".//nameTitle")
bio = bio.text if bio is not None else ""
bio = get_element_text(element.find(".//nameTitle"))
wikipedia = get_external_information_uri(element, "Wikipedia")

author = activitypub.Author(
id=element.find(".//isniURI").text,
id=get_element_text(element.find(".//isniURI")),
name=name,
isni=isni,
viafId=viaf,
aliases=aliases,
# aliases needs to be list not set
aliases=list(aliases),
bio=bio,
wikipediaLink=wikipedia,
)

return author


def build_author_from_isni(match_value):
def build_author_from_isni(match_value: str) -> dict[str, activitypub.Author]:
"""Build basic author class object from ISNI URL"""

# if it is an isni value get the data
if match_value.startswith("https://isni.org/isni/"):
isni = match_value.replace("https://isni.org/isni/", "")
return {"author": get_author_from_isni(isni)}
author = get_author_from_isni(isni)
if author is not None:
return {"author": author}
# otherwise it's a name string
return {}


def augment_author_metadata(author, isni):
def augment_author_metadata(author: models.Author, isni: str) -> None:
"""Update any missing author fields from ISNI data"""

isni_author = get_author_from_isni(isni)
if isni_author is None:
return

isni_author.to_model(model=models.Author, instance=author, overwrite=False)

# we DO want to overwrite aliases because we're adding them to the
Expand Down
2 changes: 1 addition & 1 deletion bookwyrm/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class IgnoreVariableDoesNotExist(logging.Filter):
these errors are not useful to us.
"""

def filter(self, record):
def filter(self, record: logging.LogRecord) -> bool:
if record.exc_info:
(_, err_value, _) = record.exc_info
while err_value:
Expand Down
4 changes: 3 additions & 1 deletion bookwyrm/utils/validate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Validations"""
from typing import Optional

from bookwyrm.settings import DOMAIN, USE_HTTPS


def validate_url_domain(url):
def validate_url_domain(url: str) -> Optional[str]:
"""Basic check that the URL starts with the instance domain name"""
if not url:
return None
Expand Down
3 changes: 3 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ implicit_reexport = True
[mypy-bookwyrm.connectors.*]
ignore_errors = False

[mypy-bookwyrm.utils.*]
ignore_errors = False

[mypy-bookwyrm.importers.*]
ignore_errors = False

Expand Down
17 changes: 9 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ pytest-env==0.6.2
pytest-xdist==2.3.0
pytidylib==0.3.2
pylint==2.14.0
mypy==1.4.1
mypy==1.5.1
celery-types==0.18.0
django-stubs[compatible-mypy]==4.2.3
types-bleach==6.0.0.3
django-stubs[compatible-mypy]==4.2.4
types-bleach==6.0.0.4
types-dataclasses==0.6.6
types-Markdown==3.4.2.9
types-Pillow==10.0.0.1
types-psycopg2==2.9.21.10
types-python-dateutil==2.8.19.13
types-requests==2.31.0.1
types-Markdown==3.4.2.10
types-Pillow==10.0.0.3
types-psycopg2==2.9.21.11
types-python-dateutil==2.8.19.14
types-requests==2.31.0.2
types-requests==2.31.0.2
Loading