Skip to content

Commit

Permalink
Use django commands for data load/dump/seed with test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinMind committed Sep 23, 2024
1 parent b5fcf30 commit 15df10b
Show file tree
Hide file tree
Showing 8 changed files with 360 additions and 59 deletions.
37 changes: 10 additions & 27 deletions Makefile-docker
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ export PYTHON_COMMAND=python3
export PIP_COMMAND=$(PYTHON_COMMAND) -m pip
APP=src/olympia/

NUM_ADDONS=10
NUM_THEMES=$(NUM_ADDONS)

NODE_MODULES := $(NPM_CONFIG_PREFIX)node_modules/

REQUIRED_FILES := \
Expand Down Expand Up @@ -52,37 +49,23 @@ check_django: ## check if the django app is configured properly
.PHONY: check
check: check_files check_olympia_user check_debian_packages check_pip_packages check_django

.PHONY: data_dump
data_dump:
./manage.py dump_data $(ARGS)

.PHONY: data_load
data_load:
./manage.py load_data $(ARGS)

.PHONY: initialize_db
initialize_db: ## create a new database
rm -rf ./user-media/* ./tmp/*
$(PYTHON_COMMAND) manage.py create_db --force
$(PYTHON_COMMAND) manage.py migrate --noinput
$(PYTHON_COMMAND) manage.py loaddata initial.json
$(PYTHON_COMMAND) manage.py import_prod_versions
# The superuser needs to have a mozilla.com address for admin tools access
$(PYTHON_COMMAND) manage.py createsuperuser \
--no-input \
--username "local_admin" \
--email "[email protected]"
$(PYTHON_COMMAND) manage.py loaddata zadmin/users
./manage.py seed_data

.PHONY: reindex_data
reindex_data: ## reindex the data in elasticsearch
$(PYTHON_COMMAND) manage.py reindex --force --noinput

.PHONY: populate_data
populate_data: ## populate a new database
# reindex --wipe will force the ES mapping to be re-installed. Useful to
# make sure the mapping is correct before adding a bunch of add-ons.
$(PYTHON_COMMAND) manage.py reindex --wipe --force --noinput
$(PYTHON_COMMAND) manage.py generate_addons --app firefox $(NUM_ADDONS)
$(PYTHON_COMMAND) manage.py generate_addons --app android $(NUM_ADDONS)
$(PYTHON_COMMAND) manage.py generate_themes $(NUM_THEMES)
# These add-ons are specifically useful for the addons-frontend
# homepage. You may have to re-run this, in case the data there
# changes.
$(PYTHON_COMMAND) manage.py generate_default_addons_for_frontend

.PHONY: update_db
update_db: ## run the database migrations
$(PYTHON_COMMAND) manage.py migrate --noinput
Expand Down Expand Up @@ -157,7 +140,7 @@ dbshell: ## connect to a database shell
$(PYTHON_COMMAND) ./manage.py dbshell

.PHONY: initialize
initialize: initialize_db update_assets populate_data reindex_data ## init the dependencies, the database, and assets
initialize: initialize_db update_assets reindex_data ## init the dependencies, the database, and assets

reload-uwsgi: reload

Expand Down
20 changes: 0 additions & 20 deletions Makefile-os
Original file line number Diff line number Diff line change
Expand Up @@ -75,26 +75,6 @@ shell: ## connect to a running addons-server docker shell
rootshell: ## connect to a running addons-server docker shell with root user
docker compose exec --user root web bash

.PHONY: data_export
data_export:
@ mkdir -p $(EXPORT_DIR)

# Extracting mysql database
docker compose exec mysqld /usr/bin/mysqldump olympia > $(EXPORT_DIR)/data_mysqld.sql

.PHONY: data_restore
data_restore:
@[ -d $(RESTORE_DIR) ] || (echo "Directory $(RESTORE_DIR) does not exist" && exit 1)

# Wait for MySQL server to be ready
docker compose exec mysqld bash \
-c 'while ! mysqladmin ping --silent; do echo "waiting"; sleep 1; done'

# Restoring mysql database
docker compose exec -T mysqld /usr/bin/mysql olympia < $(RESTORE_DIR)/data_mysqld.sql

$(MAKE) reindex_data

.PHONY: docker_compose_config
docker_compose_config: ## Show the docker compose configuration
@docker compose config web --format json
Expand Down
41 changes: 29 additions & 12 deletions docs/topics/development/data_management.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,30 +35,47 @@ The `make initialize` command, executed as part of `make initialize_docker`, per

## Exporting and Loading Data Snapshots

You can export and load data snapshots to manage data states across different environments or for backup purposes. The Makefile provides commands to facilitate this.
You can export and load data snapshots to manage data states across different environments or for backup purposes. The Makefile provides commands to facilitate this. These commands rely internally on django `loaddata` and `dumpdata` commands
with specific logic to save/load backups in our specified backup directory.

- **Exporting Data**:
- **Data dump**:

```sh
make data_export [EXPORT_DIR=<path>]
make data_dump [ARGS="--name <name> --force"]
```

This command creates a dump of the current MySQL database. The optional `EXPORT_DIR` argument allows you to specify a custom path for the export directory.
The default value is a timestamp in the `backups` directory.
This command creates a dump of the current MySQL database. The command accepts an optional `name` argument which will determine
the name of the directory created in the `DATA_BACKUP_DIR` directory. By default it uses a timestamp to ensure uniqueness.

The data exported will be a .sql dump of the current state of the database including any data that has been added or modified.
You can also specify the `--force` argument to overwrite an existing backup with the same name.

- **Loading Data**:

```sh
make data_restore [RESTORE_DIR=<path>]
make data_load [ARGS="--name <name>"]
```

This command restores a MySQL database from a previously exported snapshot. The optional `RESTORE_DIR` argument allows you to specify the path of the import file.
This must be an absolute path. It defaults to the latest stored snapshot in the `backups` directory.
This command will load data from an existing backup directory. The name is required and must match a directory in the `DATA_BACKUP_DIR` directory.

Refer to the Makefile for detailed instructions on these commands.
> NOTE: This command will NOT reindex elasticsearch. In most cases you should use the `make initialize_data` command instead.
> You can specify the `--load <name>` argument to load a specific backup and ensure the index is recreated.

This comprehensive setup ensures that the development environment is fully prepared with the necessary data.
## Hard Reset Database

By following these practices, developers can manage data effectively in the **addons-server** project. The use of persistent volumes, external mounts, data snapshots, and automated data population ensures a robust and flexible data management strategy. For more detailed instructions, refer to the project's Makefile and Docker Compose configuration in the repository.
The actual mysql database is created and managed by the `mysqld` container. The database is created on container start
and the actual data is stored in a persistent data volume. This enables data to persist across container restarts.

`addons-server` assumes that a database named `olympia` already exists and most data management commands will fail
if it does not.

If you need to hard reset the database (for example, to start with a fresh state), you can use the following command:

```bash
make down && docker_mysqld_volume_remove
```

This will stop the containers and remove the `mysqld` data volume from docker. The next time you run `make up` it will
create a new empty volume for you and mysql will recreate the database.

> NOTE: removing the data volume will remove the actual data! You can and should save a backup before doing this
> if you want to keep the data.
51 changes: 51 additions & 0 deletions src/olympia/amo/management/commands/dump_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import logging
import os
import shutil
from datetime import datetime

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError


class Command(BaseCommand):
help = 'Dump data with a specified name'

def add_arguments(self, parser):
parser.add_argument(
'--name',
type=str,
default=datetime.now().strftime(
'%Y%m%d%H%M%S'
), # Default to current timestamp
help='Name of the data dump',
)
parser.add_argument(
'--force', action='store_true', help='Force overwrite of existing dump'
)

def handle(self, *args, **options):
name = options.get('name')
force = options.get('force')

dump_path = os.path.abspath(os.path.join(settings.DATA_BACKUP_DIR, name))

logging.info(f'Dumping data to {dump_path}')

if os.path.exists(dump_path):
if force:
shutil.rmtree(dump_path)
else:
raise CommandError(
f'Dump path {dump_path} already exists.'
'Use --force to overwrite or --init to reseed the initial data.'
)

os.makedirs(dump_path, exist_ok=True)

data_file_path = os.path.join(dump_path, 'data.json')
call_command('dumpdata', format='json', indent=2, output=data_file_path)

storage_from = settings.STORAGE_ROOT
storage_to = os.path.join(dump_path, 'storage')
shutil.copytree(storage_from, storage_to)
36 changes: 36 additions & 0 deletions src/olympia/amo/management/commands/load_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import logging
import os
import shutil

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError


class Command(BaseCommand):
help = 'Load data from a specified name'

def add_arguments(self, parser):
parser.add_argument(
'--name',
type=str,
required=True,
help='Name of the data dump',
)

def handle(self, *args, **options):
name = options.get('name')
load_path = os.path.abspath(os.path.join(settings.DATA_BACKUP_DIR, name))

logging.info(f'Loading data from {load_path}')

if not os.path.exists(load_path):
raise CommandError(f'Dump path {load_path} does not exist.')

data_file_path = os.path.join(load_path, 'data.json')
call_command('loaddata', data_file_path)

storage_from = os.path.join(load_path, 'storage')
storage_to = os.path.abspath(settings.STORAGE_ROOT)
logging.info(f'Copying storage from {storage_from} to {storage_to}')
shutil.copytree(storage_from, storage_to, dirs_exist_ok=True)
48 changes: 48 additions & 0 deletions src/olympia/amo/management/commands/seed_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import logging
import os
import shutil

from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = 'Seed the _init data dir with fresh data from the database'

def handle(self, *args, **options):
init_name = settings.DATA_BACKUP_INIT
init_path = os.path.abspath(os.path.join(settings.DATA_BACKUP_DIR, init_name))
logging.info(f'Clearing {init_path}')
shutil.rmtree(init_path, ignore_errors=True)

logging.info('Resetting database...')
call_command('flush', '--noinput')
call_command('migrate', '--noinput')
# reindex --wipe will force the ES mapping to be re-installed. Useful to
# make sure the mapping is correct before adding a bunch of add-ons.
call_command('reindex', '--wipe', '--force', '--noinput')

logging.info('Loading initial data...')
call_command('loaddata', 'initial.json')
call_command('import_prod_versions')
call_command(
'createsuperuser',
'--no-input',
'--username',
settings.LOCAL_ADMIN_USERNAME,
'--email',
settings.LOCAL_ADMIN_EMAIL,
)
call_command('loaddata', 'zadmin/users')

logging.info('Generating add-ons...')
call_command('generate_addons', '--app', 'firefox', 10)
call_command('generate_addons', '--app', 'android', 10)
call_command('generate_themes', 5)
# These add-ons are specifically useful for the addons-frontend
# homepage. You may have to re-run this, in case the data there
# changes.
call_command('generate_default_addons_for_frontend')
logging.info(f'Dumping data to {init_path}')
call_command('dump_data', '--name', init_name)
Loading

0 comments on commit 15df10b

Please sign in to comment.