diff --git a/jupyter_server/__init__.py b/jupyter_server/__init__.py index d5b97f0c90..25d09ee20e 100644 --- a/jupyter_server/__init__.py +++ b/jupyter_server/__init__.py @@ -1,5 +1,6 @@ """The Jupyter Server""" import os +import pathlib import subprocess import sys @@ -10,6 +11,8 @@ ] DEFAULT_JUPYTER_SERVER_PORT = 8888 +JUPYTER_SERVER_EVENTS_URI = "https://events.jupyter.org/jupyter_server" +DEFAULT_EVENTS_SCHEMA_PATH = pathlib.Path(__file__).parent / "event_schemas" del os diff --git a/jupyter_server/event_schemas/contents_service/v1.yaml b/jupyter_server/event_schemas/contents_service/v1.yaml new file mode 100644 index 0000000000..2c574f7b93 --- /dev/null +++ b/jupyter_server/event_schemas/contents_service/v1.yaml @@ -0,0 +1,73 @@ +"$id": https://events.jupyter.org/jupyter_server/contents_service/v1 +version: 1 +title: Contents Manager activities +personal-data: true +description: | + Record actions on files via the ContentsManager. + + The notebook ContentsManager REST API is used by all frontends to retreive, + save, list, delete and perform other actions on notebooks, directories, + and other files through the UI. This is pluggable - the default acts on + the file system, but can be replaced with a different ContentsManager + implementation - to work on S3, Postgres, other object stores, etc. + The events get recorded regardless of the ContentsManager implementation + being used. + + Limitations: + + 1. This does not record all filesystem access, just the ones that happen + explicitly via the notebook server's REST API. Users can (and often do) + trivially access the filesystem in many other ways (such as `open()` calls + in their code), so this is usually never a complete record. + 2. As with all events recorded by the notebook server, users most likely + have the ability to modify the code of the notebook server. Unless other + security measures are in place, these events should be treated as user + controlled and not used in high security areas. + 3. Events are only recorded when an action succeeds. +type: object +required: + - action + - path +properties: + action: + enum: + - get + - create + - save + - upload + - rename + - copy + - delete + description: | + Action performed by the ContentsManager API. + + This is a required field. + + Possible values: + + 1. get + Get contents of a particular file, or list contents of a directory. + + 2. save + Save a file at path with contents from the client + + 3. rename + Rename a file or directory from value in source_path to + value in path. + + 4. copy + Copy a file or directory from value in source_path to + value in path. + + 5. delete + Delete a file or empty directory at given path + path: + type: string + description: | + Logical path on which the operation was performed. + + This is a required field. + source_path: + type: string + description: | + Source path of an operation when action is 'copy' or 'rename' diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 4e9b5a46a2..8dcd8ec95f 100644 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -83,9 +83,11 @@ from traitlets.config.application import boolean_flag, catch_config_error from jupyter_server import ( + DEFAULT_EVENTS_SCHEMA_PATH, DEFAULT_JUPYTER_SERVER_PORT, DEFAULT_STATIC_FILES_PATH, DEFAULT_TEMPLATE_PATH_LIST, + JUPYTER_SERVER_EVENTS_URI, __version__, ) from jupyter_server._sysinfo import get_sys_info @@ -1951,6 +1953,19 @@ def init_logging(self): def init_event_logger(self): """Initialize the Event Bus.""" self.event_logger = EventLogger(parent=self) + # Load the core Jupyter Server event schemas + # All event schemas must start with Jupyter Server's + # events URI, `JUPYTER_SERVER_EVENTS_URI`. + schema_ids = [ + "https://events.jupyter.org/jupyter_server/contents_service/v1", + ] + for schema_id in schema_ids: + # Get the schema path from the schema ID. + rel_schema_path = schema_id.lstrip(JUPYTER_SERVER_EVENTS_URI) + ".yaml" + schema_path = DEFAULT_EVENTS_SCHEMA_PATH / rel_schema_path + # Use this pathlib object to register the schema + # breakpoint() + self.event_logger.register_event_schema(schema_path) def init_webapp(self): """initialize tornado webapp""" diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py index e84c2d36c6..2e28c5d2dc 100644 --- a/jupyter_server/services/contents/filemanager.py +++ b/jupyter_server/services/contents/filemanager.py @@ -395,6 +395,7 @@ def get(self, path, content=True, type=None, format=None): if type == "directory": raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") model = self._file_model(path, content=content, format=format) + self.emit(data={"action": "get", "path": path}) return model def _save_directory(self, os_path, model, path=""): @@ -459,7 +460,7 @@ def save(self, model, path=""): model["message"] = validation_message self.run_post_save_hooks(model=model, os_path=os_path) - + self.emit(data={"action": "save", "path": path}) return model def delete_file(self, path): @@ -735,6 +736,7 @@ async def get(self, path, content=True, type=None, format=None): if type == "directory": raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type") model = await self._file_model(path, content=content, format=format) + self.emit(data={"action": "get", "path": path}) return model async def _save_directory(self, os_path, model, path=""): @@ -795,7 +797,7 @@ async def save(self, model, path=""): model["message"] = validation_message self.run_post_save_hooks(model=model, os_path=os_path) - + self.emit(data={"action": "save", "path": path}) return model async def delete_file(self, path): diff --git a/jupyter_server/services/contents/largefilemanager.py b/jupyter_server/services/contents/largefilemanager.py index f2a6c072fd..bb66b57758 100644 --- a/jupyter_server/services/contents/largefilemanager.py +++ b/jupyter_server/services/contents/largefilemanager.py @@ -54,6 +54,7 @@ def save(self, model, path=""): # Last chunk if chunk == -1: self.run_post_save_hooks(model=model, os_path=os_path) + self.emit(data={"action": "save", "path": path}) return model else: return super().save(model, path) @@ -125,6 +126,8 @@ async def save(self, model, path=""): # Last chunk if chunk == -1: self.run_post_save_hooks(model=model, os_path=os_path) + + self.emit(data={"action": "save", "path": path}) return model else: return await super().save(model, path) diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py index 7bd6450803..089a71fc65 100644 --- a/jupyter_server/services/contents/manager.py +++ b/jupyter_server/services/contents/manager.py @@ -7,6 +7,7 @@ import warnings from fnmatch import fnmatch +from jupyter_events import EventLogger from nbformat import ValidationError, sign from nbformat import validate as validate_nb from nbformat.v4 import new_notebook @@ -25,6 +26,7 @@ ) from traitlets.config.configurable import LoggingConfigurable +from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI from jupyter_server.transutils import _i18n from jupyter_server.utils import ensure_async, import_item @@ -53,6 +55,24 @@ class ContentsManager(LoggingConfigurable): """ + event_schema_id = JUPYTER_SERVER_EVENTS_URI + "/contents_service/v1" + event_logger = Instance(EventLogger).tag(config=True) + + @default("event_logger") + def _default_event_logger(self): + if self.parent and hasattr(self.parent, "event_logger"): + return self.parent.event_logger + else: + # If parent does not have an event logger, create one. + logger = EventLogger() + schema_path = DEFAULT_EVENTS_SCHEMA_PATH / "contents_service" / "v1.yaml" + logger.register_event_schema(schema_path) + return logger + + def emit(self, data): + """Emit event using the core event schema from Jupyter Server's Contents Manager.""" + self.event_logger.emit(schema_id=self.event_schema_id, data=data) + root_dir = Unicode("/", config=True) allow_hidden = Bool(False, config=True, help="Allow access to hidden files") @@ -416,11 +436,13 @@ def delete(self, path): raise HTTPError(400, "Can't delete root") self.delete_file(path) self.checkpoints.delete_all_checkpoints(path) + self.emit(data={"action": "delete", "path": path}) def rename(self, old_path, new_path): """Rename a file and any checkpoints associated with that file.""" self.rename_file(old_path, new_path) self.checkpoints.rename_all_checkpoints(old_path, new_path) + self.emit(data={"action": "rename", "path": new_path, "source_path": old_path}) def update(self, model, path): """Update the file's path @@ -616,6 +638,7 @@ def copy(self, from_path, to_path=None): raise HTTPError(404, "No such directory: %s" % to_path) model = self.save(model, to_path) + self.emit(data={"action": "copy", "path": to_path, "source_path": from_path}) return model def log_info(self): @@ -819,11 +842,13 @@ async def delete(self, path): await self.delete_file(path) await self.checkpoints.delete_all_checkpoints(path) + self.emit(data={"action": "delete", "path": path}) async def rename(self, old_path, new_path): """Rename a file and any checkpoints associated with that file.""" await self.rename_file(old_path, new_path) await self.checkpoints.rename_all_checkpoints(old_path, new_path) + self.emit(data={"action": "rename", "path": new_path, "source_path": old_path}) async def update(self, model, path): """Update the file's path @@ -985,6 +1010,7 @@ async def copy(self, from_path, to_path=None): raise HTTPError(404, "No such directory: %s" % to_path) model = await self.save(model, to_path) + self.emit(data={"action": "copy", "path": to_path, "source_path": from_path}) return model async def trust_notebook(self, path):