Skip to content

Commit

Permalink
Add sha256 uniqueness to CollectionVersion
Browse files Browse the repository at this point in the history
This should be ZDU compatible. The transition will be completed with the
next y-release.

fixes: #1052

Co-authored-by: Matthias Dellweg <[email protected]>
  • Loading branch information
gerrod3 and mdellweg committed May 7, 2024
1 parent ca866f8 commit b09eed7
Show file tree
Hide file tree
Showing 18 changed files with 349 additions and 116 deletions.
2 changes: 2 additions & 0 deletions CHANGES/1052.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CollectionVersion global uniqueness constraint is now its sha256 digest. Repository level uniqueness
is still (namespace, name, version).
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from gettext import gettext as _

from django.core.management import BaseCommand
from django.db import transaction

from pulp_ansible.app.models import CollectionVersion


class Command(BaseCommand):
"""
Django management command to repair ansible collection versions without sha256.
"""

help = (
"This script repairs ansible collection versions without sha256 if artifacts are available."
)

def add_arguments(self, parser):
"""Set up arguments."""
parser.add_argument(
"--dry-run",
action="store_true",
help=_("Don't modify anything, just collect results."),
)

def handle(self, *args, **options):
dry_run = options["dry_run"]
failed_units = 0
repaired_units = 0

unit_qs = CollectionVersion.objects.filter(sha256__isnull=True)
count = unit_qs.count()
print(f"CollectionVersions to repair: {count}")
if count == 0:
return

for unit in unit_qs.iterator():
try:
content_artifact = unit.contentartifact_set.get()
artifact = content_artifact.artifact
unit.sha256 = artifact.sha256

if not dry_run:
with transaction.atomic():
unit.save(update_fields=["sha256"])
except Exception as e:
failed_units += 1
print(
f"Failed to migrate collection version '{unit.namespace}.{unit.name}' "
f"'{unit.version}': {e}"
)
else:
repaired_units += 1

print(f"Successfully repaired collection versions: {repaired_units}")
print(f"Collection versions failed to repair: {failed_units}")
23 changes: 23 additions & 0 deletions pulp_ansible/app/migrations/0056_collectionversion_sha256.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.14 on 2022-07-15 22:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('ansible', '0055_alter_collectionversion_version_alter_role_version'),
]

operations = [
migrations.AddField(
model_name='collectionversion',
name='sha256',
field=models.CharField(default='', max_length=64, null=True),
preserve_default=False,
),
migrations.AlterUniqueTogether(
name="collectionversion",
unique_together={("sha256",), ("namespace", "name", "version")},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Generated by Django 3.2.14 on 2022-07-21 22:35
from django.db import migrations


def add_sha256_to_current_models(apps, schema_editor):
"""Adds the sha256 to current CollectionVersion models."""
CollectionVersion = apps.get_model("ansible", "CollectionVersion")
collection_versions_to_update = []
collection_versions_on_demand = []

for collection_version in (
CollectionVersion.objects.prefetch_related(
"content_artifacts", "content_artifacts__artifact"
)
.filter(sha256="")
.only("pk", "sha256")
.iterator()
):
content_artifact = collection_version.contentartifact_set.get()
if content_artifact.artifact:
collection_version.sha256 = content_artifact.artifact.sha256
collection_versions_to_update.append(collection_version)
else:
collection_versions_on_demand.append(collection_version)
if len(collection_versions_to_update) >= 1024:
CollectionVersion.objects.bulk_update(
collection_versions_to_update,
[
"sha256",
],
)
collection_versions_to_update.clear()
# Update remaining collection versions
if len(collection_versions_to_update) > 0:
CollectionVersion.objects.bulk_update(
collection_versions_to_update,
[
"sha256",
],
)

# If there are on-demand collections then the next migration will fail, so error here with
# helpful message on how to fix. No work will be performed by this migration on a second-run.
if len(collection_versions_on_demand) > 0:
raise Exception(
f"On demand collections found. Please remove or upload/sync their data: "
f"{[c.pk for c in collection_versions_on_demand]}"
)


class Migration(migrations.Migration):

dependencies = [
("ansible", "0056_collectionversion_sha256"),
]

operations = [migrations.RunPython(add_sha256_to_current_models, migrations.RunPython.noop)]
5 changes: 4 additions & 1 deletion pulp_ansible/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ class CollectionVersion(Content):
"""

TYPE = "collection_version"
repo_key_fields = ("name", "namespace", "version")

# Data Fields
authors = psql_fields.ArrayField(models.CharField(max_length=64), default=list, editable=False)
Expand All @@ -178,6 +179,7 @@ class CollectionVersion(Content):
namespace = models.CharField(max_length=64, editable=False)
repository = models.CharField(default="", blank=True, max_length=2000, editable=False)
requires_ansible = models.CharField(null=True, max_length=255)
sha256 = models.CharField(max_length=64, null=True, blank=False)

version = models.CharField(max_length=128, db_collation="pulp_ansible_semver")
version_major = models.IntegerField()
Expand Down Expand Up @@ -227,7 +229,7 @@ def __str__(self):

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("namespace", "name", "version")
unique_together = (("namespace", "name", "version"), ("sha256",))
constraints = [
UniqueConstraint(
fields=("collection", "is_highest"),
Expand Down Expand Up @@ -533,6 +535,7 @@ class Meta:

def finalize_new_version(self, new_version):
"""Finalize repo version."""
remove_duplicates(new_version)
removed_collection_versions = new_version.removed(
base_version=new_version.base_version
).filter(pulp_type=CollectionVersion.get_pulp_type())
Expand Down
24 changes: 6 additions & 18 deletions pulp_ansible/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,8 +490,10 @@ def deferred_validate(self, data):
# Call super to ensure that data contains artifact
data = super().deferred_validate(data)
artifact = data.get("artifact")
if (sha256 := data.pop("sha256", None)) and sha256 != artifact.sha256:
if (sha256 := data.get("sha256")) and sha256 != artifact.sha256:
raise ValidationError(_("Expected sha256 did not match uploaded artifact's sha256"))
else:
data["sha256"] = artifact.sha256

collection_info = process_collection_artifact(
artifact=artifact,
Expand All @@ -509,23 +511,7 @@ def deferred_validate(self, data):

def retrieve(self, validated_data):
"""Reuse existing CollectionVersion if provided artifact matches."""
namespace = validated_data["namespace"]
name = validated_data["name"]
version = validated_data["version"]
artifact = self.context["artifact"]
# TODO switch this check to use digest when ColVersion uniqueness constraint is changed
col = CollectionVersion.objects.filter(
namespace=namespace, name=name, version=version
).first()
if col:
if col._artifacts.get() != artifact:
raise ValidationError(
_("Collection {}.{}-{} already exists with a different artifact").format(
namespace, name, version
)
)

return col
return CollectionVersion.objects.filter(sha256=validated_data["sha256"]).first()

def create(self, validated_data):
"""Final step in creating the CollectionVersion."""
Expand All @@ -549,6 +535,8 @@ class Meta:
"expected_version",
)
model = CollectionVersion
# There was an autogenerated validator rendering sha256 required.
validators = []


class CollectionVersionSerializer(ContentChecksumSerializer, CollectionVersionUploadSerializer):
Expand Down
Loading

0 comments on commit b09eed7

Please sign in to comment.