diff --git a/doc/ast-traversal.mld b/doc/ast-traversal.mld new file mode 100644 index 00000000..ac7f1910 --- /dev/null +++ b/doc/ast-traversal.mld @@ -0,0 +1,114 @@ +{%html:
%}{{!"matching-code"}< Destructing AST nodes}{%html:
%}{{!"good-practices"}Good practices >}{%html:
%} + +{0 AST Traversals} + +The {{!Ppxlib.Parsetree}[Parsetree]} is a very complex type. Other {!Ppxlib} modules such as +{{!Ppxlib_metaquot}[Metaquot]}, {{!Ppxlib.Ast_builder}[Ast_builder]} and {{!Ppxlib.Ast_pattern}[Ast_pattern]} help in generating and matching values, +but only when the overall structure of the code is known in advance. + +For other use cases, such as extracting all identifiers, checking that a +property is verified, or replacing all integer constants by something else, +those modules cannot really help. All these examples relate with another kind +of {{!Ppxlib.Parsetree}[Parsetree]} manipulations known as traversals. + +A traversal is a recursive function that will be called on a value, and recursively on all +of its subvalues, combining the result in a certain way. For instance, {{!Stdlib.List.map}[List.map]} is a traversal of the +[list] type. In the case of a [list], a map is very simple to write, but in the +case of the long {{!Ppxlib.Parsetree}[Parsetree]} type, it is a lot of boilerplate code! Fortunately, +{{!Ppxlib}[ppxlib]} provides a way to ease this. + +In [ppxlib], traversals are implemented using the "visitor" object-oriented pattern. + +{1 Writing Traverses} + +For each kind of traversal (described below), [ppxlib] provides a "default" traversal, +in the form of a class following the visitors pattern. For instance, in the case of the map traversal, the +default map is the identity AST map, and any object of class {{!Ppxlib.Ast_traverse.map}[Ast_traverse.map]} +will be this identity map. To apply a map to a node of a given type, one needs +to call the appropriate method: + +{[ + # let f payload = + let map = new Ppxlib.Ast_traverse.map in + map#payload ;; + val f : payload -> payload = +]} + +In the example above, [f] is the identity map. But we want to define proper maps, +not just identity. This is done by creating a new class, making it inherit the +methods, and replacing the one that we want to replace. Here is an example, for +both the [iter] and [map] traversals: + +{[ +let f payload = + let checker = + object + inherit Ast_traverse.iter as super + + method! extension ext = + match ext with + | { txt = "forbidden"; _ }, _ -> + failwith "Fordidden extension nodes are forbidden!" + | _ -> super#extension ext (* Continue traversing inside the node *) + end + in + let replace_constant = + object + inherit Ast_traverse.map + method! int i = i + 1 + end + in + checker#payload payload; + replace_constant#payload payload +]} + +Note that when redefining methods, unless explicitly wanting the traversal to +stop, the original method needs to be called! That should be all that’s necessary to +know and understand the {{!Ppxlib.Ast_traverse.map}API}. + +{1 The Different Kinds of Traversals} + +{{!Ppxlib}[ppxlib]} offers different kind of {{!Ppxlib.Parsetree}[Parsetree]} traversals: + +- {{!Ppxlib.Ast_traverse.iter}Iterators}, which will traverse the type, calling + a function on each node for side effects. + +- {{!Ppxlib.Ast_traverse.map}Maps}, where the content is replaced. A map will + transform a [Parsetree] into another [Parsetree], replacing nodes following the + map function. + +- {{!Ppxlib.Ast_traverse.fold}Folds}, which will traverse the nodes, carrying a + value (often called an accumulator) that will be updated on each node. + +- {{!Ppxlib.Ast_traverse.lift}Lifts}, a transformation that turns a [Parsetree] value in one of another type by + transforming it in a bottom-up manner. For instance, with a simple tree + structure, the corresponding [lift] function would be: + +{[ + let lift ~f = function + Leaf a -> f.leaf a + | Node(a,x,y) -> f.node a (lift ~f x) (lift ~f y) +]} + +- Combinations of the two traversals, such as + {{!Ppxlib.Ast_traverse.fold_map}Fold-maps} and + {{!Ppxlib.Ast_traverse.lift_map_with_context}Lift-maps}. + +- Variants of the above traversal, such as + {{!Ppxlib.Ast_traverse.map_with_context}Maps with context}, where a context + can be modified and passed down to child nodes during traversal. The context + never goes up; it is only propagated down. It is used for instance to track + opened module. To give a simple example, such a context could be the depth of + the current node, as in the following implementation for the simple tree type: + +{[ + let map_with_depth_context ~f ctxt = function + Leaf a -> f.leaf ctxt a + | Node(a,x,y) -> + f.node ctxt a + (map_with_depth_context (ctxt+1) ~f x) + (map_with_depth_context (ctxt+1) ~f y) +]} + + +{%html:
%}{{!"matching-code"}< Destructing AST nodes}{%html:
%}{{!"good-practices"}Good practices >}{%html:
%} diff --git a/doc/driver.mld b/doc/driver.mld new file mode 100644 index 00000000..10a674c6 --- /dev/null +++ b/doc/driver.mld @@ -0,0 +1,376 @@ +{%html:
%}{{!"quick_intro"}< Introduction}{%html:
%}{{!"writing-ppxs"}Writing PPXs >}{%html:
%} + +{0 How It Works} + +{1 General Concepts} + +{2 The Driver} + +[ppxlib] sits in between the PPXs authors and the compiler toolchain. For the PPX +author, it provides an API to define the transformation and register it to +[ppxlib]. Then, all registered transformations can be turned into a single +executable, called the {e driver}, that is responsible for applying all the +transformations. The driver will be called by the compiler. + +The PPX authors register their transformations using the +{{!Ppxlib.Driver.register_transformation}[Driver.register_transformation]} function, as explained in the +{{!"writing-ppxs"}Writing PPXs} section. The different arguments of this function +corresponds to the {{!"derivers-and-extenders"}different kinds} of PPXs supported by +[ppxlib] or the {{!driver_execution}phase}, at which time they will be executed. + +The driver is created by calling either {{!Ppxlib.Driver.standalone}[Driver.standalone]} or +{{!Ppxlib.Driver.run_as_ppx_rewriter}[Driver.run_as_ppx_rewriter]}. Note that when used through Dune, none of +these functions will need to be called by the PPX author. As we will see, Dune +will be responsible for generating the driver after all required PPXs from +different libraries have been registered. These functions will interpret the +command line arguments and start the rewriting accordingly. + +The {{!Ppxlib.Driver.standalone}[Driver.standalone]} function creates an executable that +parses an OCaml file, transforms it according to the registered transformations, +and outputs the transformed file. This makes it suitable for use with the [-pp] +{{:https://v2.ocaml.org/releases/5.0/htmlman/comp.html#s:comp-options}option} of the OCaml compiler. It is a preprocessor for sources and is +standalone in the sense that it can be called independently from the OCaml +compiler (e.g., it includes an OCaml parser). + +On the other hand, the +{{!Ppxlib.Driver.run_as_ppx_rewriter}[Driver.run_as_ppx_rewriter]}-generated driver is a +proper PPX, as it will read and output a {{!Ppxlib.Parsetree}[Parsetree]} marshalled +value directly. This version is suitable for use with the [-ppx] {{:https://v2.ocaml.org/releases/5.0/htmlman/comp.html#s:comp-options}option} of the OCaml +compiler, as well as any tool that requires control of parsing the file. +For instance, {{:https://merlin.com}Merlin} includes an OCaml parser that tries +hard to recover from errors in order to generate a valid AST most of the time. + +Several arguments can be passed to the driver when executing it. Those arguments +can also be easily passed using Dune, as explained in its +{{:https://dune.readthedocs.io/en/stable/concepts.html#preprocessing-with-ppx-rewriters}manual}. +PPX authors can add arguments to their generated drivers using {{!Ppxlib.Driver.add_arg}[Driver.add_arg]}. Here are the default arguments for respectively +{{!Ppxlib.Driver.standalone}[standalone]} and +{{!Ppxlib.Driver.run_as_ppx_rewriter}[run_as_ppx_rewriter]} generated drivers: + +{%html:
Standalone driver%} +{v +driver.exe [extra_args] [] + -as-ppx Run as a -ppx rewriter (must be the first argument) + --as-ppx Same as -as-ppx + -as-pp Shorthand for: -dump-ast -embed-errors + --as-pp Same as -as-pp + -o Output file (use '-' for stdout) + - Read input from stdin + -dump-ast Dump the marshaled ast to the output file instead of pretty-printing it + --dump-ast Same as -dump-ast + -dparsetree Print the parsetree (same as ocamlc -dparsetree) + -embed-errors Embed errors in the output AST (default: true when -dump-ast, false otherwise) + -null Produce no output, except for errors + -impl Treat the input as a .ml file + --impl Same as -impl + -intf Treat the input as a .mli file + --intf Same as -intf + -debug-attribute-drop Debug attribute dropping + -print-transformations Print linked-in code transformations, in the order they are applied + -print-passes Print the actual passes over the whole AST in the order they are applied + -ite-check (no effect -- kept for compatibility) + -pp Pipe sources through preprocessor (incompatible with -as-ppx) + -reconcile (WIP) Pretty print the output using a mix of the input source and the generated code + -reconcile-with-comments (WIP) same as -reconcile but uses comments to enclose the generated code + -no-color Don't use colors when printing errors + -diff-cmd Diff command when using code expectations (use - to disable diffing) + -pretty Instruct code generators to improve the prettiness of the generated code + -styler Code styler + -output-metadata FILE Where to store the output metadata + -corrected-suffix SUFFIX Suffix to append to corrected files + -loc-filename File name to use in locations + -reserve-namespace Mark the given namespace as reserved + -no-check Disable checks (unsafe) + -check Enable checks + -no-check-on-extensions Disable checks on extension point only + -check-on-extensions Enable checks on extension point only + -no-locations-check Disable locations check only + -locations-check Enable locations check only + -apply Apply these transformations in order (comma-separated list) + -dont-apply Exclude these transformations + -no-merge Do not merge context free transformations (better for debugging rewriters). As a result, the context-free transformations are not all applied before all impl and intf. + -cookie NAME=EXPR Set the cookie NAME to EXPR + --cookie Same as -cookie + -help Display this list of options + --help Display this list of options +v} +{%html:
%} + +and + +{%html:
Ppx rewriter driver%} +{v +driver.exe [extra_args] + -loc-filename File name to use in locations + -reserve-namespace Mark the given namespace as reserved + -no-check Disable checks (unsafe) + -check Enable checks + -no-check-on-extensions Disable checks on extension point only + -check-on-extensions Enable checks on extension point only + -no-locations-check Disable locations check only + -locations-check Enable locations check only + -apply Apply these transformations in order (comma-separated list) + -dont-apply Exclude these transformations + -no-merge Do not merge context free transformations (better for debugging rewriters). As a result, the context-free transformations are not all applied before all impl and intf. + -cookie NAME=EXPR Set the cookie NAME to EXPR + --cookie Same as -cookie + -help Display this list of options + --help Display this list of options +v} +{%html:
%} + +{2 Cookies} + +Cookies are values that are passed to the driver via the command line, or set as +side effects of transformations, which can be accessed by the +transformations. They have a name to identify them and a value consisting of an +OCaml expression. The module to access cookies is {{!Ppxlib.Driver.Cookies}[Driver.Cookies]}. + +{2 Integration With Dune} + +The {{:https://dune.build}Dune} build system is well integrated with the [ppxlib] +mechanism of registering transformations. In every [dune] file, Dune will read +the set of PPXs that are to be used (i.e. the PPXs in `(preprocess (pps ))`). For a given set of rewriters, it will +generate a driver using {{!Ppxlib.Driver.run_as_ppx_rewriter}[Driver.run_as_ppx_rewriter]} that contains all registered +transformations. Using a single driver for multiple +transformations from multiple PPXs ensures better composition semantics and +improves the speed of the combined transformations. +Moreover, [ppxlib] communicates with Dune through [.corrected] files to allow for +promotion, for instance when using {{!"writing-ppxs"."inlining-transformations"}[[@@deriving_inline]]}. A PPX author can also +generate its own promotion suggestion using the +{{!Ppxlib.Driver.register_correction}[Driver.register_correction]} function. + +{1:compat_mult_ver Compatibility With Multiple OCaml Versions} + +One of the important issues with working with the +{{!Ppxlib.Parsetree}[Parsetree]} is that the API is not stable. For instance, in +the {{:https://ocaml.org/releases/4.13.0}OCaml 4.13 release}, the following +{{:https://github.com/ocaml/ocaml/pull/9584/files#diff-ebecf307cba2d756cc28f0ec614dfc57d3adc6946eb4faa9825eb25a92b2596d}two} +{{:https://github.com/ocaml/ocaml/pull/10133/files#diff-ebecf307cba2d756cc28f0ec614dfc57d3adc6946eb4faa9825eb25a92b2596d}changes} +were made to the {{!Ppxlib.Parsetree}[Parsetree]} type. Although they are small changes, they may +break any PPX that is written to directly manipulate the (evolving) type. + +This instability causes an issue with maintenance. PPX authors wish to maintain +a single version of their PPX, not one per OCaml version, and ideally not have +to update their code when an irrelevant (for them) field is changed in the +{{!Ppxlib.Parsetree}[Parsetree]}. + +[ppxlib] helps to solve both issues. The first one, having to maintain a single +PPX version working for every OCaml version, is done by migrating the +{{!Ppxlib.Parsetree}[Parsetree]}. The PPX author only maintains a version +working with the latest version, and the [ppxlib] driver will convert the values from one version to another. + +For example, say a deriver is applied in the context of OCaml 4.08. After the +4.08 {{!Ppxlib.Parsetree}[Parsetree]} has been given to it, the [ppxlib] driver +will migrate this value into the latest {{!Ppxlib.Parsetree}[Parsetree]} +version, using the {!Astlib} module. The "latest" here depends on the version of +[ppxlib], but at any given time, the latest released version of [ppxlib] will always +use the latest released version of the {{!Ppxlib.Parsetree}[Parsetree]}. + +After the migration to the latest {{!Ppxlib.Parsetree}[Parsetree]}, +the driver runs all transformations on it, which ends with a rewritten +{{!Ppxlib.Parsetree}[Parsetree]} of the latest version. However, since the +context of rewriting is OCaml 4.08 (in this example), the driver needs to +migrate back the rewritten {{!Ppxlib.Parsetree}[Parsetree]} to an OCaml 4.08 +version. Again, [ppxlib] uses the {!Astlib} module for this migration. Once the +driver has rewritten the AST for OCaml 4.08, the compilation can continue as usual. + +{1:derivers-and-extenders Context-Free Transformations} + +[ppxlib] defines several kinds of transformations whose core property is that they +can only read and modify the code locally. The parts of the AST given +to the transformation are only portions of the whole AST. In this regard, they +are usually called {e context-free} transformations. While being not as +general-purpose as plain AST transformations, they are more than often +sufficient and have many nice properties such as a well-defined semantics for +composition. +The two most important context-free transformations are {e derivers} and +{e extenders}. + +{2:def_derivers Derivers} + +A {e deriver} is a context-free transformation that, given a certain structure or +signature item, will generate code {e to append after} this item. The given code is +never modified. A deriver can be very useful to generate values depending on the +structure of a user-defined type, for instance a converter for a type +to and from a JSON value. A deriver is triggered by adding an +{{:https://v2.ocaml.org/manual/attributes.html}attribute} to a structure or +signature item. For instance, the folowing code: + +{@ocaml[ + type t = Int of int | Float of float [@@deriving yojson] + + let x = ... +]} + +would be rewritten to: + +{@ocaml[ + type ty = Int of int | Float of float [@@deriving yojson] + + let ty_of_yojson = ... + let ty_to_yojson = ... + + let x = ... +]} + +{2:def_extenders Extenders} + +An {e extender} is a context-free transformation that is triggered on +{{:https://v2.ocaml.org/manual/extensionnodes.html}extension nodes}, and that +will replace the extension node by some code generated from the extension node's payload. This can be very useful to generate values of a DSL using a more +user-friendly syntax, e.g., to generate OCaml values from the JSON +syntax. + +For instance, the following code: + +{@ocaml[ + let json = + [%yojson + [ { name = "Anne"; grades = ["A"; "B-"; "B+"] } + ; { name = "Bernard"; grades = ["B+"; "A"; "B-"] } + ] + ] +]} + +could be rewritten into: + +{@ocaml[ + let json = + `List + [ `Assoc + [ ("name", `String "Anne") + ; ("grades", `List [`String "A"; `String "B-"; `String "B+"]) + ] + ; `Assoc + [ ("name", `String "Bernard") + ; ("grades", `List [`String "B+"; `String "A"; `String "B-"]) + ] + ] +]} + +{2 Advantages} + +There are multiple advantages of using context-free transformations. First, they +provide the PPX user a much clearer understanding of the AST parts that +will be rewritten, rather than a fully general AST rewriting. Secondly, they provide +a much better composition semantic, which does not depend on the order. Finally, +context-free transformations are applied in a single phase factorising the work +for all transformations, resulting in a much faster driver than when combining +multiple, whole AST transformations. More details on the execution of this phase +are given in its {{!"context-free-phase"}dedicated section.} + +See the {{!"writing-ppxs"}Writing PPXs} section for how to define derivers and +extenders. + +{1:driver_execution The Execution of the Driver} + +The actual rewriting of the AST is done in multiple phases: + +{ol +{- The linting phase} +{- The preprocessing phase} +{- The first instrumentation phase} +{- The context-free phase} +{- The global transformation phase} +{- The last instrumentation phase} +} + +When registering a transformation through the +{{!Ppxlib.Driver.register_transformation}[Driver.register_transformation]} function, the phase in which the +transformation has to be applied is specified. The multiplicity of phases is +mostly to account for potential constraints on the execution order. However, +most of the time there are no such constraints, and in this case, either the +{{!"context-free-phase"}context-free} or the +{{!"global-transfo-phase"}global transformation phase} should be used. (Note that +whenever possible, which should be almost always, context-free transformations +are possible and better.) If you register in another phase, be sure to know what +you are doing. + +{2 The Linter Phase} + +Linters are preprocessors that take as input the whole AST and output a list +of "lint" errors. Such an error is of type {{!Ppxlib.Driver.Lint_error.t}[Driver.Lint_error.t]} and includes a +string (the error message) and the location of the error. The errors will be +reported as preprocessors warnings. + +This is the first phase, so linting errors can only be reported for code +handwritten by the user. + +An example of a PPX registered in this phase is +{{:https://github.com/janestreet/ppx_js_style}ppx_js_style}. + +{2 The Preprocessing Phase} + +The preprocessing phase is the first transformation that actually alters the +AST. In fact, the property of being the "first transformation applied" is what +defines this phase, and [ppxlib] will thus ensure that only one transformation is +registered in this phase; otherwise, it will generate an error. + +You should only register a transformation in this phase if it is really strongly +necessary, and you know what you are doing. Your PPX will not be usable at the +same time as another one registering a transformation in this phase. + +An example of a PPX registered in this phase is +{{:https://github.com/thierry-martinez/metapp}metapp}. + +{2 The First Instrumentation Phase} + +This phase is for transformations that {e need} to be run before the +context-free phase. Historically, it was meant for +{{:https://en.wikipedia.org/wiki/Instrumentation_(computer_programming)}instrumentation}-related +PPXs, hence the name. Unlike the {{!"the-preprocessing-phase"}preprocessing +phase}, registering to this phase provides no guarantee that the transformation +is run early in the rewriting, as there is no limit in the number of +transformations registered in this phase, which are then applied in the +alphabetical order by their name. + +If it is not crucial for a transformation to run before the context-free +phase, it should be registered to the {{!"global-transfo-phase"}global +transformation phase}. + +{2:context-free-phase The Context-Free Phase} + +The execution of all registered context-free rules is done in a single top-down +pass through the AST. Whenever the top-down pass encounters a situation +that triggers rewriting, the corresponding transformation is called. For instance, +when encountering an extension point corresponding to a rewriting rule, the +extension point is replaced by the rule's execution, and the top-down pass +continues inside the generated code. Similarly, when a deriving attribute is +found attached to a structure or signature item, the result of the +deriving rule’s application is appended to the AST, and the top-down pass +continues in the generated code. + +Note that the code generation for derivers is applied when "leaving" the AST +node, that is when all rewriters have been run. Indeed, a deriver like this: + +{[ + type t = [%my_type] [@@deriving deriver_from_type] +]} + +would need the information generated by the [my_type] extender to match on the +structure of [t]. + +Also note that in this phase, the execution of the context-free rules are +intertwined altogether, and it would not make sense to speak about the order of +application, contrary to the next phase. + +{2:global-transfo-phase The Global Transformation Phase} + +The global transformation phase is the phase where registered transformations, +seen as function from and to the {{!Ppxlib.Parsetree}[Parsetree]}, are run. The applied order might matter and change the outcome, but since [ppxlib] knows nothing +about the transformations, the order applied is alphabetical by the transformation's name. + +{2 The Last Instrumentation Phase} + +This phase is for global transformation to escape the alphabetical order and be +executed as a last phase. For instance, {{:https://github.com/aantron/bisect_ppx}[bisect_ppx]} +needs to be executed after all rewriting has occurred. + +Note that only one global transformation can be executed last. If several +transformations rely on being the last transformation, it will be true for only +one of them. Thus, only register your transformation in this phase if it is +absolutely vital to be the last transformation, as your PPX will become +incompatible with any other that registers a transformation during this phase. + +{%html:
%}{{!"quick_intro"}< Introduction}{%html:
%}{{!"writing-ppxs"}Writing PPXs >}{%html:
%} diff --git a/doc/examples.mld b/doc/examples.mld new file mode 100644 index 00000000..ca72780e --- /dev/null +++ b/doc/examples.mld @@ -0,0 +1,167 @@ +{%html:
%}{{!"good-practices"}< Good practices}{%html:
%}{%html:
%} + +{0 Examples} + +This section is here to allow viewing complete examples of PPXs written using [ppxlib] directly in the documentation. However, they are not "complete" in the sense that the overall organization, such as the [dune] files, is not included. + +In order to see a fully working complete example of a PPX written using [ppxlib], that you can compile, modify and test, go to the {{:https://github.com/ocaml-ppx/ppxlib/tree/main/examples}examples} folder of ppxlib sources. + +{1 [ppx_deriving_accesors]} + +The fully complete, ready-to-compile [ppx_deriving_accesors] example is accessible in [ppxlib]'s {{:https://github.com/ocaml-ppx/ppxlib/tree/main/examples/simple-deriver}sources}. + +This deriver will generate accessors for record fields, from the record type +definition. + +For example, this code: + +{@ocaml[ +type t = + { a : string + ; b : int + } + [@@deriving accessors] +]} + +will generate the following, appended after the type definition: + +{@ocaml[ +let a x = x.a +let b x = x.b +]} + +The entire code is: + +{@ocaml[ +open Ppxlib +module List = ListLabels +open Ast_builder.Default + +let accessor_impl (ld : label_declaration) = + let loc = ld.pld_loc in + pstr_value ~loc Nonrecursive + [ + { + pvb_pat = ppat_var ~loc ld.pld_name; + pvb_expr = + pexp_fun ~loc Nolabel None + (ppat_var ~loc { loc; txt = "x" }) + (pexp_field ~loc + (pexp_ident ~loc { loc; txt = lident "x" }) + { loc; txt = lident ld.pld_name.txt }); + pvb_attributes = []; + pvb_loc = loc; + }; + ] + +let accessor_intf ~ptype_name (ld : label_declaration) = + let loc = ld.pld_loc in + psig_value ~loc + { + pval_name = ld.pld_name; + pval_type = + ptyp_arrow ~loc Nolabel + (ptyp_constr ~loc { loc; txt = lident ptype_name.txt } []) + ld.pld_type; + pval_attributes = []; + pval_loc = loc; + pval_prim = []; + } + +let generate_impl ~ctxt (_rec_flag, type_declarations) = + let loc = Expansion_context.Deriver.derived_item_loc ctxt in + List.map type_declarations ~f:(fun (td : type_declaration) -> + match td with + | { + ptype_kind = Ptype_abstract | Ptype_variant _ | Ptype_open; + ptype_loc; + _; + } -> + let ext = + Location.error_extensionf ~loc:ptype_loc + "Cannot derive accessors for non record types" + in + [ Ast_builder.Default.pstr_extension ~loc ext [] ] + | { ptype_kind = Ptype_record fields; _ } -> + List.map fields ~f:accessor_impl) + |> List.concat + +let generate_intf ~ctxt (_rec_flag, type_declarations) = + let loc = Expansion_context.Deriver.derived_item_loc ctxt in + List.map type_declarations ~f:(fun (td : type_declaration) -> + match td with + | { + ptype_kind = Ptype_abstract | Ptype_variant _ | Ptype_open; + ptype_loc; + _; + } -> + let ext = + Location.error_extensionf ~loc:ptype_loc + "Cannot derive accessors for non record types" + in + [ Ast_builder.Default.psig_extension ~loc ext [] ] + | { ptype_kind = Ptype_record fields; ptype_name; _ } -> + List.map fields ~f:(accessor_intf ~ptype_name)) + |> List.concat + +let impl_generator = Deriving.Generator.V2.make_noarg generate_impl +let intf_generator = Deriving.Generator.V2.make_noarg generate_intf + +let my_deriver = + Deriving.add "accessors" ~str_type_decl:impl_generator + ~sig_type_decl:intf_generator +]} + +{1 [ppx_get_env]} + +The fully complete, ready-to-compile [ppx_get_env] example is accessible in [ppxlib]'s {{:https://github.com/ocaml-ppx/ppxlib/tree/main/examples/simple-extension-rewriter}sources}. + +A PPX rewriter that will expand [[%get_env "SOME_ENV_VAR"]] into the value of the +env variable [SOME_ENV_VAR] at compile time, as a string. + +E.g., assuming we set [MY_VAR="foo"], it will turn: + +{@ocaml[ +let () = print_string [%get_env "foo"] +]}``` + +into: + +{@ocaml[ +let () = print_string "foo" +]} + + +Note that this is just a toy example, and we actually advise against this +type of PPX that has side effects or relies heavily on the file system or [env] +variables, unless you absolutely you know what you're doing. + +In this case, it won't work well with Dune, since Dune won't know +about the dependency on the env variables specified in the extension's payload. + +The entire code is: + +{@ocaml[ +open Ppxlib + +let expand ~ctxt env_var = + let loc = Expansion_context.Extension.extension_point_loc ctxt in + match Sys.getenv env_var with + | value -> Ast_builder.Default.estring ~loc value + | exception Not_found -> + let ext = + Location.error_extensionf ~loc "The environment variable %s is unbound" + env_var + in + Ast_builder.Default.pexp_extension ~loc ext + +let my_extension = + Extension.V3.declare "get_env" Extension.Context.expression + Ast_pattern.(single_expr_payload (estring __)) + expand + +let rule = Ppxlib.Context_free.Rule.extension my_extension +let () = Driver.register_transformation ~rules:[ rule ] "get_env" +]} + +{%html:
%}{{!"good-practices"}< Good practices}{%html:
%}{%html:
%} diff --git a/doc/generating-code.mld b/doc/generating-code.mld new file mode 100644 index 00000000..7a634a5d --- /dev/null +++ b/doc/generating-code.mld @@ -0,0 +1,360 @@ +{%html:
%}{{!"writing-ppxs"}< Writing PPXs}{%html:
%}{{!"matching-code"}Destructing AST nodes >}{%html:
%} + +{0 Generating AST Nodes} + +The rewriter's core is a function that outputs code in the form of an AST. +However, there are some issues with generating AST values when using the +constructors directly: + +- The type is {{!Ppxlib.Parsetree}pretty verbose}, with many fields + rarely used. +- The AST type might change at a version bump. In this case, the types used in +the PPX would become incompatible with the types of the new OCaml version. + +The second point is important: since [ppxlib] {{!page-driver.compat_mult_ver}translates} the AST to +the newest OCaml AST available before rewriting, your PPX would not +only become incompatible with the new OCaml version, but also with all [ppxlib] +versions released after the new AST type is introduced. + +For this reason, [ppxlib] provides abstractions over the OCaml AST, with a focus +on usability and stability. + +{1 The Different Options} + +The two main options are: + +- {{!Ppxlib.Ast_builder}[Ast_builder]}, +- {!Ppxlib_metaquot}. + +{{!Ppxlib.Ast_builder}[Ast_builder]} provides an API to generate AST nodes for the latest OCaml +version in a backward-compatible way. {!Ppxlib_metaquot} is different: it is a PPX that +lets you generate OCaml AST nodes by writing OCaml code, using quotations and +anti-quotations. + +Using {!Ppxlib_metaquot} requires less knowledge of the OCaml AST than +{{!Ppxlib.Ast_builder}[Ast_builder]} as it only uses natural OCaml syntax; +however, it's more restrictive than `Ast_builder` for two reasons: first, it's less flexible, since on its own it lacks the ability to generate nodes dynamically from other kind of data: e.g. it's not possible to build an expression containing a string, given the string as input. Second, it's less general because it only allows users to generate few different nodes such as +structure items, expressions, patterns, etc., but it is not possible to generate a +value of type {{!Ppxlib.Parsetree.row_field_desc}[row_field_desc]}! A typical workflow is to use `metaquot` for the constant skeleton of the node, and to use the `metaquot` anti-quotation workflow (see below) together with `Ast_builder` to fill in the dynamic parts. + +Note: `Ppxlib` also re-exports the OCaml compiler API `Ast_helper` for historic reasons. It might get deprecated at some point, though. Please, use `Ast_builder` instead. +manipulate the AST. This module is in [ppxlib] for compatiblity reasons and it is recommended to use {{!Ppxlib.Ast_builder}[Ast_builder]} instead. + +{1:ast_builder The [AST_builder] Module} + +{2 General Presentation} + +The {{!Ppxlib.Ast_builder}[Ast_builder]} module provides several kinds of functions to generate AST +nodes. The first kind are ones whose name matches closely the {{!Ppxlib.Parsetree}[Parsetree]} type names. +equivalents, but there are also "higher level" wrappers around those basic +blocks for common patterns such as creating an integer or string constant. + +{3 Low-Level Builders} + +The function names match the {{!Ppxlib.Parsetree}[Parsetree]} names closely, which makes it easy to +build AST fragments by just knowing the {{!Ppxlib.Parsetree}[Parsetree]}. + +For types wrapped in a record's [_desc] field, helpers are +generated for each constructor that generates the record wrapper, e.g., +for the type {{!Ppxlib.Parsetree.expression}[Parsetree.expression]}: + +{[ + type expression = + { pexp_desc : expression_desc + ; pexp_loc : Location.t + ; pexp_attributes : attributes + } + and expression_desc = + | Pexp_ident of Longident.t loc + | Pexp_constant of constant + | Pexp_let of rec_flag * value_binding list * expression + ... +]} + +The following helpers are created: + +{[ + val pexp_ident : loc:Location.t -> Longident.t loc -> expression + val pexp_constant : loc:Location.t -> constant -> expression + val pexp_let : loc:Location.t -> rec_flag -> value_binding list -> expression -> expression + ... +]} + +For other record types, such as [type_declaration], we have the following +helper: + +{[ + type type_declaration = + { ptype_name : string Located.t + ; ptype_params : (core_type * variance) list + ; ptype_cstrs : (core_type * core_type * Location.t) list + ; ptype_kind : type_kind + ; ptype_private : private_flag + ; ptype_manifest : core_type option + ; ptype_attributes : attributes + ; ptype_loc : Location.t + } + + val type_declaration + : loc : Location.t + -> name : string Located.t + -> params : (core_type * variance) list + -> cstrs : (core_type * core_type * Location.t) list + -> kind : type_kind + -> private : private_flag + -> manifest : core_type option + -> type_declaration +]} + +Attributes are always set to the empty list. If you want to set them, you +have to override the field with the [{ e with pexp_attributes = ... }] +notation. + +{3 High-Level Builders} + +Those functions are just wrappers on the low-level functions for simplifying the +most common use. For instance, to simply create a [1] integer constant with the +low-level building block, it would look like: + +{[ + Ast_builder.Default.pexp_constant ~loc (Parsetree.Pconst_integer ("1", None)) +]} + +This seems a lot for such a simple node. So, in addition to the low-level +building blocks, {{!Ppxlib.Ast_builder}[Ast_builder]} provides higher level-building blocks, such as +{{!Ppxlib.Ast_builder.Default.eint}[Ast_builder.Default.eint]}, to create integer constants: + +{[ + Ast_builder.Default.eint ~loc 1 +]} + +Those functions also follow a pattern in their name to make them easier to use. +Functions that generate an expression start with an [e], followed by what they +build, such as [eint], [echar], [estring], [eapply], [elist], etc. Similarly, names +that start with a [p] define a pattern, such as [pstring], [pconstruct], +[punit], etc. + +{2 Dealing With Locations} + +As explained in the {{!page-"good-practices"."resp_loc"}dedicated section}, it is crucial +to correctly deal with locations. For this, {{!Ppxlib.Ast_builder}[Ast_builder]} can be used in +several ways, depending on the context: + +{{!Ppxlib.Ast_builder.Default}[Ast_builder.Default]} contains functions which take the location as a named +argument. This is the strongly recommended workflow and lets you control locations in a fine-grained way. + +If you have a concrete reason to specify the location once and for all, and always use this +specific one later in AST constructions, you can use the {{!Ppxlib.Ast_builder.Make}[Ast_builder.Make]} functor +or the {{!Ppxlib.Ast_builder.make}[Ast_builder.make]} function (outputing a first order module). Notice that this is quite a rare use case. + +{2 Compatibility} + +In order to stay as compatible as possible when a new option appears in the AST, +{{!Ppxlib.Ast_builder}[Ast_builder]} always integrates the new option in a retro-compatible way (this is the case since the AST bump from 4.13 to 4.14). So, the +signature of each function won't change, and {{!Ppxlib.Ast_builder}[Ast_builder]} will choose a +retrocompatible way of generating an updated type’s AST node. + +However, sometimes you might want to use a feature that was introduced recently +in OCaml and is not integrated in {{!Ppxlib.Ast_builder}[Ast_builder]}. For instance, OCaml +4.14 introduced the possibility to explicitly introduce type variables in a +constructor declaration. This modified the AST type, and for +backwards compatibility, {{!Ppxlib.Ast_builder}[Ast_builder]} did not modify the signature of the +function. It is thus impossible to generate code using this new feature via the `Ast_module` directly. + +In the case you need to access a new feature, you can use the [Latest] submodule +(e.g., {{!Ppxlib.Ast_builder.Default.Latest}[Ast_builder.Default.Latest]} when specifying the locations). This module includes new functions, letting you +control all features introduced, at the cost of potentially breaking +changes when a new feature modifies the function in use. + +If a feature that was introduced in some recent version of OCaml is essential +for your PPX to work, it might imply that you need to restrict the OCaml version +on your opam dependencies. +{{!page-driver.compat_mult_ver}Remember} that +[ppxlib] will rewrite using the latest [Parsetree] version, {e but} it will then migrate the +[Parsetree] back to the OCaml version of the switch, possibly losing the information +given by the new feature. + +{1:metaquot [Metaquot] Metaprogramming} + +{2 General Presentation} + +As you have seen, defining code with {{!Ppxlib.Ast_builder}[Ast_builder]} does +not feel perfectly natural. Some knowledge of the [Parsetree] types is needed. +Yet, every part of a program we write corresponds to a specific AST node, so +there is no need for AST generation to be more difficult than that. + +[Metaquot] is a very useful PPX that allows users to define values of a [Parsetree] +type by writing natural code, using the quotations and antiquotations mechanism of +metaprogramming. + +Simplifying a bit, {{!Ppxlib_metaquot}[Metaquot]} rewrites an expression extension point directly +with its payload. Since the payload was parsed by the OCaml parser to a +[Parsetree] type's value, this rewriting turns naturally written code into AST values. + +{2 Usage} + +First, in order to use [Metaquot], add it in your [preprocess] Dune stanza: + +{[ + (preprocess (pps ppxlib.metaquot)) +]} + +Using Metaquot to generate code is simple: any [Metaquot] extension node in an +expression context will be rewritten into the [Parsetree] value that lies in its payload. +Notice that you'll need the [Ppxlib] opened, and a [loc] value of type +{{!Ppxlib.Location.t}Location.t} in scope when using metaquot. That location +will be attached to the [Parsetree] nodes your metaquot invokation produces. +Getting the location right is extremely important for error messages. + +However, the {{!Ppxlib.Parsetree.payload}[Parsetree.payload]} of an extension node can only take few forms: a +{{!Ppxlib.Parsetree.structure}[structure]}, a {{!Ppxlib.Parsetree.signature}[signature]}, a {{!Ppxlib.Parsetree.core_type}[core type]}, or a {{!Ppxlib.Parsetree.pattern}[pattern]}. We might want to generate +other kind of nodes, such as {{!Ppxlib.Parsetree.expression}[expressions]} or {{!Ppxlib.Parsetree.structure_item}[structure items]}, for instance. +{!Ppxlib_metaquot} provides different extension nodes for this: + +- The [expr] extension node to generate {{!Ppxlib.Parsetree.expression}[expressions]}: +{[let e = [%expr 1 + 1]]} +- The [pat] extension node to generate {{!Ppxlib.Parsetree.pattern}[patterns]}: +{[let p = [%pat? ("", _)]]} +- The [type] extension node to generate {{!Ppxlib.Parsetree.core_type}[core types]}: + {[let t = [%type: int -> string]]} +- The [stri] extension node to generate {{!Ppxlib.Parsetree.structure_item}[structure_item]}, with its [sigi] counterpart for {{!Ppxlib.Parsetree.signature_item}[signature_item]}:: + {[ + let stri = [%stri let a = 1] + let sigi = [%sigi: val i : int] + ]} +- The [str] and [sig] extension nodes to respectively generate + {{!Ppxlib.Parsetree.structure}[structure]} + and {{!Ppxlib.Parsetree.signature}[signature]}. + {[ +let str = + [%str + let x = 5 + let y = 6.3] + +let sig_ = + [%sig: + val x : int + val y : float] +]} + +Note the replacement work when the extension node is an "expression" +extension node: Indeed, the [payload] is a {e value} (of [Parsetree] type) that would not fit +elsewhere in the AST. So, [let x : [%str "incoherent"]] would not be rewritten by [metaquot]. +(Actually, it also rewrites "pattern" extension nodes, as you'll see in the +chapter on {{!page-"matching-code".metaquot}matching AST nodes}.) + +Also note the [:] and [?] in the [sigi], [type], and [pat] cases: they are needed for +the payload to be parsed as the right kind of node. + +Consider now the extension node [[%expr 1 + 1]] in an expression context. +[Metaquot] will actually expand it into the following code: + +{[ + { + pexp_desc = + (Pexp_apply + ({ + pexp_desc = (Pexp_ident { txt = (Lident "+"); loc }); + pexp_loc = loc; + pexp_attributes = [] + }, + [(Nolabel, + { + pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); + pexp_loc = loc; + pexp_attributes = [] + }); + (Nolabel, + { + pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); + pexp_loc = loc; + pexp_attributes = [] + })])); + pexp_loc = loc; + pexp_attributes = [] + } +]} + +Looking at the example, you might notice two things: + +- The AST types are used without a full path to the module. +- There is a free variable named [loc] and of type [Location.t] in the code. + +So for this to compile, you need both to open [ppxlib] and to have a [loc : Location.t] +variable in scope. +The produced AST node value, and every other node within it, will be located in +this [loc]. You should therefore make sure that [loc] is the location you want for +your generated code when using [metaquot]. + +{2:antiquotations Anti-Quotations} + +Using these extensions alone, you can only produce constant/static AST nodes. +[metaquot] has a solution for that: anti-quotation. You can use anti-quotation +to insert any expression representing an AST node. That way, you can include +dynamically generated nodes inside a [metaquot] expression extension point. + +Consider the following example: + +{[ + let with_suffix_expr ~loc s = + let dynamic_node = Ast_builder.Default.estring ~loc s in + [%expr [%e dynamic_node] ^ "some_fixed_suffix"] +]} + +The [with_suffix_expr] function will create an [expression] which represents the +concatenation of the [s] argument and the fixed suffix, i.e., +[with_suffix_expr "some_dynamic_stem"] is equivalent to +[[%expr "some_dynamic_stem" ^ "some_fixed_suffix"]]. + +The syntax for anti-quotation depends on the type of the node you wish to insert +(which must also correspond to the context of the anti-quotation extension node): + +- [e] is the extension point used to anti-quote values of type + {{!Ppxlib.Parsetree.expression}[expression]}: + {[let f some_expr_node = [%expr 1 + [%e some_expr_node]]]} +- [p] is the extension point used to anti-quote values of type + {{!Ppxlib.Parsetree.pattern}[pattern]}: + {[let f some_pat_node = [%pat? (1, [%p some_pat_node])]]} +- [t] is the extension point used to anti-quote values of type + {{!Ppxlib.Parsetree.core_type}[core_type]}: + {[let f some_core_type_node [%type: int -> [%t some_core_type_node]]]} +- [m] is the extension point used to anti-quote values of type + {{!Ppxlib.Parsetree.module_expr}[module_expr]} + or {{!Ppxlib.Parsetree.module_type}[module_type]}: + {[ + let f some_module_expr_node = [%expr let module M = [%m some_module_expr_node] in M.x] + let f some_module_type_node = [%sigi: module M : [%m some_module_type_node]] + ]} +- [i] is the extension point used to anti-quote values of type + {{!Ppxlib.Parsetree.structure_item}[structure_item]} or + {{!Ppxlib.Parsetree.signature_item}[signature_item]}. Note that the syntax for structure/signature item extension nodes uses two [%%]: + {[ +let f some_structure_item_node = + [%str + let a = 1 + + [%%i some_structure_item_node]] + +let f some_signature_item_node = + [%sig: + val a : int + + [%%i some_signature_item_node]] +]} + +If an anti-quote extension node is in the wrong context, it won't be +rewritten by {{!Ppxlib_metaquot}[Metaquot]}. For instance, in [[%expr match [] with [%e some_value] -> 1]] +the anti-quote extension node for expressions is put in a pattern context, +and it won't be rewritten. + +On the contrary, you should use anti-quotes whose kind ([[%e ...]], [[%p ...]]) +match the context. For example, you should write: + +{@ocaml[ + let let_generator pat type_ expr = + [%stri let [%p pat] : [%t type_] = [%e expr]] ;; +]} + +Finally, remember that we are inserting values, so we never use patterns in the payloads of anti-quotations. Those will be used for {{!page-"matching-code".antiquotations}matching}. + +{%html:
%}{{!"writing-ppxs"}< Writing PPXs}{%html:
%}{{!"matching-code"}Destructing AST nodes >}{%html:
%} diff --git a/doc/good-practices.mld b/doc/good-practices.mld new file mode 100644 index 00000000..dd61cd86 --- /dev/null +++ b/doc/good-practices.mld @@ -0,0 +1,405 @@ +{%html:
%}{{!"ast-traversal"}< Traversing the AST}{%html:
%}{{!"examples"}Examples >}{%html:
%} + +{0 Good Practices} + +{1:resp_loc Respecting Locations} + +Correctly dealing with location is essential to correctly generate OCaml code. +They are necessary for error reporting by the compiler, but more generally for +Merlin's features to work, such as displaying occurrences and jumping to +definition. When called, the driver is called with the [-check] and +[-check-locations] flags, [ppxlib] makes it is a requirement that locations follow +some rules in order to accept the rewriting, as it will check that some +invariants are respected. + +{2 The Invariants} + +The invariants are as follows: + +- AST nodes are requested to be well-nested WRT locations +- the locations of "sibling" AST nodes should not overlap + +This is required for Merlin to behave properly. + +Indeed, for almost any query directed at Merlin, it will need to inspect the +context around the user's cursor to give an answer that makes sense. And the +only input it has to do that is the cursor’s position in the buffer. +The handling of most queries starts by traversing the AST, using the +locations of nodes to select the right branch. (1) is necessary to avoid +discarding subtrees too early, (2) is used to avoid Merlin making arbitrary +choices (if you ask for the type under the cursor, and there seems to be two +things under the cursor, Merlin will need to pick one). + +{2 Guidelines for Writing Well-Behaved PPXs} + +It's obviously not always (indeed rarely) possible to mint new locations +when manipulating the AST. + +The intended way to deal with locations is this: + +- AST nodes that exist in the source should keep their original location +- new nodes should be given a "ghost" location (i.e., + [{ some_loc with loc_ghost = true }]) to indicate that the node doesn't + exist in the sources. + +In particular, {{!Ppxlib.Location.none}[Location.none]} is never meant to be +used by PPX authors, where some location is always available (for instance, +derivers and extenders at least know the locations of their relevant node). + +Both the new check and Merlin will happily traverse the ghost nodes as if they +didn't exist. Note: this comes into play when deciding which nodes are +"siblings," for instance, if your AST is: + +{v + A (B1(C, D), + B2(X, Y)) +v} + +but [B2] has a ghost location, then [B1], [X] and [Y] are considered +siblings. + +Additionally, there is an attribute [\[@merlin.hide\]] that you can add on +nodes to tell Merlin (and the check) to ignore this node and all of its +children. Some helpers for this are provided in {{!Ppxlib.Merlin_helpers}[Merlin_helpers]}. + +{1:handling_errors Handling Errors} + +In order to give a nice user experience when reporting errors or failures in a +PPX, it is necessary to include as much generated content as possible. +Most IDE tools, such as Merlin, rely on the AST for their features, such as +displaying type, jumping to definition, or showing the list of errors. + +{2 Embedding the Errors in the AST} + +A common way to report an error is to throw an exception. However, this method +interrupts the execution flow of the [ppxlib] driver and leaves later PPXs +unexpanded when handing the AST over to Merlin. + +Instead, it is better to always return a valid AST, as complete as possible, but +with "error extension nodes" at every place where successful code generation was +impossible. Error extension nodes are special extension nodes [[%ocaml.error +error_message]] that can be embedded into a valid AST and are interpreted +later as errors, e.g., by the compiler or Merlin. As all extension nodes, +they can be put {{:https://ocaml.org/manual/extensionnodes.html}at many places +in the AST} to replace structure items, expressions, or patterns, for example. + +So whenever you're in doubt whether to throw an exception or if to embed the error as +an error extension node when writing a PPX rewriter, +embed the error is the way to go! And whenever you're in doubt about where +exactly to embed the error inside the AST, a good ground rule is: as deep in +the AST as possible. + +For instance, suppose a rewriter is supposed to define a new record type, but +there is an error in one field’s type generation. In order to have +the most complete AST as output, the rewriter can still define the type and all +of its fields, putting an extension node in place of the type of the faulty +field: + +{[ + type long_record = { + field_1: int; + field_2: [%ocaml.error "field_2 could not be implemented due to foo"]; + } +]} + +[ppxlib] provides a function in its API to create error extension nodes: +{{!Ppxlib.Location.error_extensionf}[error_extensionf]}. This function creates +an extension node, which then must be transformed in the right kind of node +using functions such as +{{!Ppxlib.Ast_builder.Default.pexp_extension}[pexp_extension]}. + +{2 A Documented Example} + +Let us give an example. We will define a deriver on types records, which +constructs a default value from a given type. For instance, the derivation on +the type [type t = { x:int; y: float; z: string}] would yield [let default_t = +{x= 0; y= 0.; z= ""}]. This deriver has two limitations: + +{ol +{- It does not work on other types than records,} +{- It only works for records containing fields of type [string], [int], or [float].} +} + +The rewriter should warn the user about these limitations with a good error +reporting. Let’s first look at the second point. Here is the function mapping +the fields from the type definition to a default expression. + +{[ + let create_record ~loc fields = + let declaration_to_instantiation (ld : label_declaration) = + let loc = ld.pld_loc in + let { pld_type; pld_name; _ } = ld in + let e = + match pld_type with + | { ptyp_desc = Ptyp_constr ({ txt = Lident "string"; _ }, []); _ } -> + pexp_constant ~loc (Pconst_string ("", loc, None)) + | { ptyp_desc = Ptyp_constr ({ txt = Lident "int"; _ }, []); _ } -> + pexp_constant ~loc (Pconst_integer ("0", None)) + | { ptyp_desc = Ptyp_constr ({ txt = Lident "float"; _ }, []); _ } -> + pexp_constant ~loc (Pconst_float ("0.", None)) + | _ -> + pexp_extension ~loc + @@ Location.error_extensionf ~loc + "Default value can only be derived for int, float, and string." + in + ({ txt = Lident pld_name.txt; loc }, e) + in + let l = List.map fields ~f:declaration_to_instantiation in + pexp_record ~loc l None +]} + + +When the record definition contains several fields with types other than [int], +[float], or [string], several error nodes are added in the AST. Moreover, the +location of the error nodes corresponds to the field record's definition. +This allows tools such as Merlin to report all errors at once, at the right +location, resulting in a better workflow than having to recompile every time an +error is corrected to see the next one. + +The first limitation is that the deriver cannot work on non-record types. +However, we decided here to derive a default value, even in the case of +non-record types, so that it does not appear as undefined in the remaining of +the file. This impossible value consists of an error extension node. + +{[ + let generate_impl ~ctxt (_rec_flag, type_declarations) = + let loc = Expansion_context.Deriver.derived_item_loc ctxt in + List.map type_declarations ~f:(fun (td : type_declaration) -> + let e, name = + match td with + | { ptype_kind = Ptype_record fields; ptype_name; ptype_loc; _ } -> + (create_record ~loc:ptype_loc fields, ptype_name) + | { ptype_name; ptype_loc; _ } -> + ( pexp_extension ~loc + @@ Location.error_extensionf ~loc:ptype_loc + "Cannot derive accessors for non record type %s" + ptype_name.txt, + ptype_name ) + in + [ + pstr_value ~loc Nonrecursive + [ + { + pvb_pat = ppat_var ~loc { txt = "default_" ^ name.txt; loc }; + pvb_expr = e; + pvb_attributes = []; + pvb_loc = loc; + }; + ]; + ]) + |> List.concat +]} + +{2 In Case of Panic} + +In some rare cases, it might happen that a whole file rewriter is not able to +output a meaningful AST. In this case, they might be tempted to raise a located +error: an exception that includes the error's location. Moreover, this has +historically been what was suggested to do by [ppxlib] examples, but it is now +discouraged in most of the cases, as it prevents Merlin features to work well. + +If such an exception isn't caught, the PPX driver will return an error code, +and the exception will be pretty-printed, including the location (that's the +case when Dune calls the driver). When the driver is spawned with the +[-embed-errors] or [-as-ppx] flags (that's the case when Merlin calls the driver), +the driver will look for located error. If it catches one, it will stop +its rewriting chain at this point and output an AST consisting of the +located error followed by the last valid AST: the one passed to the raising +rewriter. + +Even more in context-free rewriters, raising should be avoided in favour of +outputting a single error node when finer grained reporting is not needed or +possible. As the whole context-free rewriting is done in one traverse of the +AST, a single raise will cancel both the context-free pass and upcoming +rewriters, and the AST prior to the context-free pass will be outputted together +with the error. + +The function provided by the API to raise located errors is +{{!Ppxlib.Location.raise_errorf}[raise_errorf]}. + +{2 Migrating From Raising to Embedding Errors} + +Lots of PPXs exclusively use {{!Ppxlib.Location.raise_errorf}[raise_errorf]} +to report errors, instead of the more Merlin-friendly way of +embedding errors in the AST, as described in this section. + +If you want to migrate such a codebase to the embedding approach, the rest of this section will present few +recipes to do that. It might not be completely trivial, as raising can +be done anywhere in the code, including in places where "embedding" would not +make sense. The first thing you can do is to turn your internal raising functions to +function returning a [result] type. + +The workflow for this change would look like this: + +{ol +{- Search your code for all uses of {{!Ppxlib.Location.raise_errorf}[raise_errorf]}, using [grep], for instance.} +{- For each of them, turn them into functions returning a [(_, extension) result] type, using {{!Ppxlib.Location.error_extensionf}[error_extensionf]} to generate the [Error].} +{- Let the compiler or Merlin tell you where to propagate the [result] type (most certainly using [map]s and [bind]s).} +{- When you have propagated until a point where you can embed an extension node, turn the [Error] case into an extension node and embed it.} +} + +This is quite convenient, as it allows you to do a "type-driven" modification, +using the full static analysis of OCaml to never omit a special case and to +confidently find the place the most deeply in the AST to embed the error. +However, it might induce quite a lot of code modification, and exceptions are +sometimes convenient to use depending on your preference. In case you want to do only +a very simple change and keep using exception, just catch them at the right place and turn them into extension +points embedded in the AST, as in the following example: + +{[ +let rewrite_extension_point loc payload = + try generate_ast payload + with exn -> + let get_error exn = + match Location.Error.of_exn exn with + | None -> raise exn + | Some error -> error + in + let extension = exn |> get_error |> Location.Error.to_extension in + Ast_builder.Default.pstr_extension ~loc ext [] +]} + +{1:quoting Quoting} + +Quoting is part of producing +{{:https://en.wikipedia.org/wiki/Hygienic_macro}hygienic} code. But before +talking about the solution, let's introduce the problem. + +Say you are writing an extension rewriter, which takes an expression as payload, and would replace all identifiers [id] in the expression with a similar expression, but with a printing debug: + +{[ + let x = 0 in + let y = 2 in + [%debug x + 1, y + 2 ] +]} + +would generate the following code: + +{[ + let x = 0 in + let y = 2 in + let debug = Printf.printf "%s = %d; " in + (debug "x" x ; x) + 1, + (debug "y" y ; y) + 2 +]} + + +When executed, the code would print [x = 0; y = 2; ]. So far, so good. However, suppose now that instead of [x], the variable is named [debug]. The following seemingly equivalent code: + +{[ + let debug = 0 in + let y = 2 in + [%debug debug + 1, y + 2 ] +]} + +would generate: + +{[ + let debug = 0 in + let y = 2 in + let debug = Printf.printf "%s = %d; " in + (debug "debug" debug ; debug) + 1, + (debug "y" y ; y) + 2 +]} + +which does not even type-check! The problem is that the payload is expected to +be evaluated in some environment where [debug] has some value and type, but the +rewriting modifies this environment and shadows the [debug] name. + + + +"Quoting" is a mechanism to prevent this problem from happenning. In [ppxlib], it +is done through the {{!Ppxlib.Expansion_helpers.Quoter}[Expansion_helpers.Quoter]} module in several steps: + +- First, create a quoter using the {{!Ppxlib.Expansion_helpers.Quoter.create}[create]} function: + +{[ + # open Expansion_helper ;; + #s let quoter = Quoter.create () ;; + val quoter : Quoter.t = +]} + +- Then, use {{!Ppxlib.Expansion_helpers.Quoter.quote}[Expansion_helpers.Quoter.quote]} to quote all the expressions that are given from the user, might rely on a context, and that you want "intact." + +{[ + # let quoted_part = Quoter.quote quoter part_to_quote ;; + val quoted_payload : expression = +]} + +- Finally, call {{!Ppxlib.Expansion_helpers.Quoter.sanitize}[Expansion_helpers.Quoter.sanitize]} on the whole expression (with quoted parts). + +{[ + # let result = Expansion_helpers.Quoter.sanitize ~quoter rewritten_expression ;; + val result : expression = + ... +]} + +If the [debug] rewriter had been written using this method, the quoting would +have ensured that the payload is evaluated in the same context as the +extension node! + +Here is an example on how to write a [debug] rewriter (with the limitation that the payload should not contain variable binding, but the code was left simple to illustrate quoting): + +{[ +# let rewrite expr = + (* Create a quoter *) + let quoter = Quoter.create () in + (* An AST mapper to log and replace variables with quoted ones *) + let replace_var = + object + (* See the chapter on AST traverse *) + inherit Ast_traverse.map as super + + (* in case of expression *) + method! expression expr = + match expr.pexp_desc with + (* in case of identifier (not "+") *) + | Pexp_ident { txt = Lident var_name; loc } + when not (String.equal "+" var_name) -> + (* quote the var *) + let quoted_var = Quoter.quote quoter expr in + let name = Ast_builder.Default.estring ~loc var_name in + (* and rewrite the expression *) + [%expr + debug [%e name] [%e quoted_var]; + [%e quoted_var]] + (* otherwise, continue inside recursively *) + | _ -> super#expression expr + end + in + let quoted_rewrite = replace_var#expression expr in + let loc = expr.pexp_loc in + (* Sanitize the whole thing *) + Quoter.sanitize quoter + [%expr + let debug = Printf.printf "%s = %d; " in + [%e quoted_rewrite]] ;; + val rewrite : expression -> expression = +]} + +With {!Ppxlib}'s current quoting mechanism, the code given in that example would look like: + +{[ + # Format.printf "%a\n" Pprintast.expression @@ rewrite [%expr debug + 1, y + 2] ;; + let rec __1 = y + and __0 = debug in + let debug = Printf.printf "%s = %d; " in + (((debug "debug" __0; __0) + 1), ((debug "y" __1; __1) + 2)) + - : unit = () +]} + +{1 Testing Your PPX} + +This section is not yet written. You can refer to {{:https://tarides.com/blog/2019-05-09-an-introduction-to-ocaml-ppx-ecosystem#testing-your-ppx}this blog post} (notice that that blog post was written before `dune` introduced its cram test feature), or contribute to the [ppxlib] documentation by opening a pull request in the {{:https://github.com/ocaml-ppx/ppxlib/}repository}. + +{1 Migrate From Other Preprocessing Systems} + +This section is not yet written. You can contribute to the [ppxlib] documentation by opening a pull request in the {{:https://github.com/ocaml-ppx/ppxlib/}repository}. + +{1 Other good practices} + +There are many good practices or other way to use [ppxlib] that are not mentioned in this manual. For instance, (in very short), you should always try to fully qualify variable names that are generated into the code via a PPX. + +if you want to add a section to this "good practices" manual, you can contribute to the [ppxlib] documentation by opening a pull request in the {{:https://github.com/ocaml-ppx/ppxlib/}repository}. + +{%html:
%}{{!"ast-traversal"}< Traversing the AST}{%html:
%}{{!"examples"}Examples >}{%html:
%} diff --git a/doc/index.mld b/doc/index.mld index 8dd3488b..0ae7b67b 100644 --- a/doc/index.mld +++ b/doc/index.mld @@ -1,28 +1,28 @@ -{0 Ppxlib's user manual} +{0 [ppxlib]'s user manual} {1 Overview} -This is the user manual and api for ppxlib, the core of the ppx meta-programming -system for {{:https://ocaml.org/}OCaml} and its derivatives such as -{{:https://reasonml.github.io/}Reason}. This manual is aimed at both users and -authors of ppx rewriters and contains everything one should know in order to use -or write ppx rewriters. - -It is assumed in this manual that the user is familiar with the -{{:https://dune.build/}Dune} build system. In particular, all the examples in -this manual referring to the build system will present -{{:https://dune.build/}Dune} configurations files and commands. It is possible -to use ppxlib with other build systems, however this is not covered by this -manual. +This is the user manual and API for [ppxlib], the core of the PPX meta-programming +system for {{:https://ocaml.org/}OCaml} and its derivatives, such as +{{:https://reasonml.github.io/}Reason}. For a good introduction on PPXs, what +they are, and how to use them, see the +{{:https://ocaml.org/docs/metaprogramming}OCaml official guide} on PPXs. This +manual is mostly aimed at authors of PPX rewriters and contains everything one +should know in order to write PPX rewriters. {1 Manual} -The {{!page-manual}manual} consists of three main sections: - -- {{!page-manual."what-is-ppx"}What is ppx} -- {{!page-manual."ppxlib-for-end-users"}Ppxlib for end users} -- {{!page-manual."ppxlib-for-plugin-authors"}Ppxlib for plugin authors} - +The manual consists of several sections. It can be read linearly, but you can also jump directly to your section of interest: +{ol +{li {{!page-"quick_intro"}An introduction to [ppxlib]}} +{li {{!page-"driver"}How [ppxlib] works internally}} +{li {{!page-"writing-ppxs"}Registering a transformation}} +{li {{!page-"generating-code"}Generating AST nodes}} +{li {{!page-"matching-code"}Destructing AST nodes}} +{li {{!page-"ast-traversal"}Traversing the AST}} +{li {{!page-"good-practices"}Good practices}} +{li {{!page-"examples"}Examples}} +} {1 API} The API exposes the following modules: diff --git a/doc/manual.mld b/doc/manual.mld deleted file mode 100644 index 2b26f84f..00000000 --- a/doc/manual.mld +++ /dev/null @@ -1,723 +0,0 @@ -{0 Ppxlib's manual} - -{1:what-is-ppx What is ppx} - -{2:ppx-overview Overview} - -Ppx is a meta-programming system for the OCaml programming language. It allows -developers to generate code at compile time in a principled way. The -distinguishing feature of ppx is that it is tightly integrated with the OCaml -parser and instead of operating at the text level it operates on the internal -structured representation of the language in the compiler, called the Abstract -Syntax Tree or AST for short. - -A few years ago, the OCaml language was extended with two new constructions: -annotations and extension points. Annotations are arbitrary pieces of -information that can be attached to most parts of the OCaml language. They can -be used to control the behavior of the OCaml compiler, or in some specific cases -to generate code at compile time. - -Extension points are compile time functions. The compiler itself doesn't know -how to interpret them and they must all be rewritten by the ppx system before -the compiler can process input files further. - -Ppxlib mainly supports two ways of generating code at compile time: by expanding -an extension point or by expanding a [[@@deriving ...]] attribute after a type -declaration. - -{2 How does it work?} - -The ppx system is composed of 3 parts: - -- individual ppx rewriters -- ppxlib -- a hook in the compiler - -Individual ppx rewriters are those implemented by various developers to provide -features to end users, such as -{{:https://github.com/janestreet/ppx_expect}ppx_expect} which provides a good -inline testing framework. - -All these rewriters are written against the ppxlib API. Ppxlib is responsible -for acknowledging the various rewriters an end user wants to use, making sure -they can be composed together and performing the actual rewriting of input -files. - -The hook in the compiler allows ppxlib to insert itself in the compilation -pipeline and perform the rewriting of input files based on a list of ppx -rewriters specified by the user. The hooks take the form of command line flags -that take a command to execute. The compiler supports two slightly different -flags, for providing commands that are executed at different stages: [-pp] and -[-ppx]. The difference between the two is as follow: - -- [-pp] takes as argument a command that is used to parse the textual - representation. Such a command can produce either a plain OCaml source file or - a serialised representation of the AST - -- [-ppx] takes as argument a command that is given a serialised representation - of the AST and produces another serialised AST - -Ppxlib generally uses the first one as it yields faster compilation times, -however it supports both methods of operation. - -{2 Is ppxlib necessary?} - -Yes. While authors of ppx rewriters may in theory use the compiler hooks -directly, doing so is strongly discouraged for the following reasons: - -- composing such ppx rewriters is slow and yields much slower compilation times -- the ABI of the hook is not stable and regularly changes in incompatible ways. - This means that a ppx rewriter using the compiler hook directly is likely to - work only with a single version of the OCaml compiler -- the compiler does not provide good composition semantics, which means that - input files will not always be transformed as expected. It is hard to predict - what the final result will be, and for end users it is hard to understand what - is happening when things go wrong -- the compiler doesn't handle hygiene: if an attribute is mistyped or misplaced, - it is silently ignored by the compiler. If two ppx rewriters want to interpret - the same attribute or extension point in incompatible ways, the result is not - specified - -In summary, ppxlib abstracts away the low-level details of the ppx -system and exposes a consistent model to authors of ppx rewriters and end users. - -{2 Current state of the ppx ecosystem} - -Ppxlib was developed after the introduction of the ppx system. As a result, many -ppx rewriters do not currently use ppxlib and are using the compiler hooks -directly. Ppxlib can acknowledge such rewriters so that they can be used in -conjunction with more modern rewriters, however it cannot provide a good -composition or hygiene story when using such ppx rewriters. - -{2 Note on stability regarding new compiler releases} - -Due to the nature of the ppx system, it is hard for ppxlib to provide full -protection against compiler changes. This means that a ppx rewriter written -against ppxlib today can be broken by a future release of the OCaml compiler and -a new release of the ppx rewriter will be necessary to support the new compiler. - -However the following is true: every time this might happen, it will be possible -to extend ppxlib to provide a greater protection, so that eventually the whole -ppx ecosystem is completely shielded from breaking compiler changes. - -{1:ppxlib-for-end-users PPX for end users} - -This section describes how to use ppx rewriters for end users. - -{2 Using a ppx rewriter in your project} - - -To use one or more ppx rewriters written by you or someone else, simply list -them in the [preprocess] field of your [dune] file. For instance: - -{[ - - (library - (name my_lib) - (preprocess (pps ppx_sexp_conv ppx_expect))) -]} - -Some ppx rewriters takes parameters in the form of command line flags. These can -be specified using the usual convention for command line flags: atoms starting -with [-] are treated as flags and [--] can be used to separate ppx rewriter -names from more command line flags. For instance: - -{[ - (library - (name my_lib) - (preprocess - (pps ppx_sexp_conv ppx_expect -inline-test-drop))) - - (library - (name my_lib) - (preprocess - (pps ppx_sexp_conv ppx_expect -- --cookie "x=42"))) -]} - -Once this is done, you can use whatever feature is offered by the ppx rewriter. - -{2 Looking at the generated code} - -At the time of writing this manual, there is no easy way to look at the fully -transformed input file in order to see exactly what will be compiled by OCaml. -You can however use the following method, which is not great but works: run -[ocamlc -dsource _build/default/]. For -instance to see the transformed version of [src/foo.ml], run: - -{[ - $ ocamlc -dsource _build/default/src/foo.pp.ml -]} - -{2 [@@deriving_inline]} - -Ppxlib supports attaching the [[@@deriving]] attribute to type declaration. This -is used to generate code at compile time based on the structure of the type. For -this particular case, ppxlib supports an alternative way to look at the -generated code: replace [[@@deriving ]] by [[@@deriving_inline -][@@@end]]. Then run the following command: - -{[ - $ dune build --auto-promote -]} - -If you reload the file in your editor, you should now see the contents of the -generated code between the [[@@deriving_inline]] and [[@@@end]] attribute. This -can help understanding what is provided by a ppx rewriter or debug compilation -errors. - -{2 Dropping ppx dependencies with [@@deriving_inline]} - -You might notice that the resulting file when using [[@@deriving_inline]] needs -no special treatment to be compiled. In particular, you can build it without the -ppx rewriter or even ppxlib. You only need them while developing the project, in -order to automatically produce the generated code but that's it. End users of -your project do not need to install ppxlib and other ppx rewriters themselves. - -{{:https://dune.build/}Dune} gracefully supports this workflow: simply replace -[preprocess] in your [dune] file by [lint]. For instance: - -{[ - (library - (name my_lib) - (lint (pps ppx_sexp_conv))) -]} - -Then to regenerate the parts between [[@@deriving_inline]] and [[@@@end]], run -the following command: - -{[ - $ dune build @lint --auto-promote -]} - -{1:ppxlib-for-plugin-authors PPX for plugin authors} - -This section describes how to use [ppxlib] for PPX plugin authors. - -{2 Getting started} - -There are two main kinds of PPX plugins you can write with [ppxlib]: - -- Extension rewriters i.e. ppx plugins that rewrite extension points such as - [[%my_ext ...]] into valid OCaml code. -- Derivers i.e. ppx plugins that generate code from type, module or exception - declarations annotated with [[@@deriving my_deriver]]. - -It is also possible to write more advanced transformations such as rewriting -constants that bear the right suffix, rewriting function calls based on the -function identifier or to generate code from items annotated with a custom -attribute but we won't cover those in this section. - -[ppxlib] compiles those transformations into rules which allows it to apply them -to the right AST nodes, even recursively in nodes generated by other -transformations, in a single AST traversal. - -Note that you can also write arbitrary, whole AST transformations with ppxlib -but they don't have a clear composition semantic since they have to be applied -sequentially as opposed to the other, better defined rewriting rule. You should -always prefer the above mentioned transformations instead when possible. - -{3 The OCaml AST} - -As described in {!"ppx-overview"}, PPX rewriters don't operate at the text -level but instead used the compiler's internal representation of the source -code: the Abstract Syntax Tree or AST. - -A lot of the following sections of the manual assume a certain level of -familiarity with the OCaml AST so we'll try to cover the basics here and to give -you some pointers to deepen your knowledge on the subject. - -The types describing the AST are defined in the [Parsetree] module of OCaml's -compiler-libs. Note that they vary from one version of the compiler to another -so make sure you look at an up to date version and most importantly to the one -corresponding to what ppxlib's using internally. You can find the module's API -documentation online -{{:https://caml.inria.fr/pub/docs/manual-ocaml/compilerlibref/Parsetree.html}here}. -If you're new to these parts of OCaml it's not always easy to navigate as it -just contains the raw type declarations but no actual documentation. This -documentation is actually written in [parsetree.mli] but not in a way that -allows it to make its way to the online doc unfortunately. Until this is fixed -in the compiler you can look at the local copy in one of your opam switches: -[/lib/ocaml/compiler-libs/parsetree.mli]. Here you'll find -detailed explanations as to which part of the concrete syntax the various types -correspond to. - -Ppxlib includes a [Parsetree] module for every version of OCaml since [4.02]. -For instance, the version for [4.05] is in {!Astlib.Ast_405.Parsetree}. In what -comes next, we will link the values we describe to the {!Ppxlib.Parsetree} -module, which corresponds to one version of [Parsetree]. - -[Parsetree] is quite a large module and there are plenty of types there, a lot -of which you don't necessarily have to know when writing a rewriter. The two -main entry points are the [structure] and [signature] types which, amongst other -things, describe respectively the content of [.ml] and [.mli] files. Other types -you should be familiar with are: - -- {{!Ppxlib.Parsetree.expression}[expression]} which describes anything in OCaml - that evaluates to a value, the right hand side of a let binding or the - branches of an if-then-else for instance. -- {{!Ppxlib.Parsetree.pattern}[pattern]} which is what you use to deconstruct an - OCaml value, the left hand side of a let binding or a pattern-matching case - for example. -- {{!Ppxlib.Parsetree.core_type}[core_type]} which describes type - expressions ie what you use to explicitly constrain the type of an expression - or describe the type of a value in your [.mli] files. Usually it's what comes - after a [:]. -- {{!Ppxlib.Parsetree.structure_item}[structure_item]} and - {{!Ppxlib.Parsetree.signature_item}[signature_item]} which describe the top - level AST nodes you can find in a structure or signature such as type - definitions, value declarations or module declarations. - -Knowing what these types correspond to puts you in a good position to write a -PPX plugin as they are the parts of the AST you will deal with the most in -general. - -{3 Writing an extension rewriter} - -To write your ppx plugin you'll need to add the following stanza in your dune -file: - -{[ - (library - (public_name my_ppx_rewriter) - (kind ppx_rewriter) - (libraries ppxlib)) -]} - -You'll note that you have to let dune know this is not a regular library but a -ppx_rewriter using the [kind] field. The public name you chose here is the name -your users will refer to your ppx in there [preprocess] field. E.g. here to use -this ppx rewriter one would add the [(preprocess (pps my_ppx_rewriter))] to -their [library] or [executable] stanza. - -You will also need the following [my_ppx_rewriter.ml]: - -{[ - open Ppxlib - - let expand ~ctxt payload = - ... - - let my_extension = - Extension.V3.declare - "my_ext" - - - expand - - let rule = Ppxlib.Context_free.Rule.extension my_extension - - let () = - Driver.register_transformation - ~rules:[rule] - "my_ext" -]} - -There are a few things to explain here. The last part, i.e. the call to -{{!Ppxlib.Driver.register_transformation}[Driver.register_transformation]} is -common to almost all ppxlib-based PPX plugins and is how you let [ppxlib] know -about your transformation. You'll note that here we register a single rule but -it is possible to register several rules for a single logical transformation. - -The above is specific to extension rewriters. You need to declare a ppxlib -{{!Ppxlib.Extension}[Extension]}. The first argument is the extension name, -that's what will appear after the [%] in the extension point when using your -rewriter, e.g. here this will transform [[%my_ext ...]] nodes. The -[] argument describes where in OCaml code your this extension -can be used. You can find the full list in the API documentation in the -{{!Ppxlib.Extension.Context}[Extension.Context]} module. The [] -argument helps you restrict what users can put into the payload of your -extension, i.e. [[%my_ext ]]. We cover -{{!Ppxlib.Ast_pattern}[Ast_pattern]} in depths here but the simplest form it can -take is {{!Ppxlib.Ast_pattern.__}[Ast_pattern.__]} which allows any payload -allowed by the language and passes it to the expand function which is the last -argument here. The expand function is where the logic for your transformation is -implemented. It receives an -{{!Ppxlib.Expansion_context.Extension.t}[Expansion_context.Extension.t]} -argument labelled [ctxt] and other arguments whose type and number depends on -the [] argument. The return type of the function is determined by -the [] argument, e.g. in the following example: - -{[ - Extension.V3.declare "my_ext" Extension.Context.expression Ast_pattern.__ expand -]} - -The type of the [expand] function is: - -{[ - val expand : ctxt: Expansion_context.Extension.t -> payload -> expression -]} - -If you want to look at a concrete example of extension rewriter you can find one -in the [examples/] folder of the [ppxlib] repository -{{:https://github.com/ocaml-ppx/ppxlib/tree/main/examples/simple-extension-rewriter}here}. - -{3 Writing a deriver} - -Similarly to extension rewriters, derivers must be declared as such to dune. To -do so you can use the following stanza in your dune file: - -{[ - (library - (public_name my_ppx_deriver) - (kind ppx_deriver) - (libraries ppxlib)) -]} - -Same as above, the public name used here determines how users will refer to your -ppx deriver in their dune stanzas. - -You will also need the following [my_ppx_deriver.ml]: - -{[ - open Ppxlib - - let generate_impl ~ctxt (rec_flag, type_declarations) = - ... - - let generate_intf ~ctxt (rec_flag, type_declarations) = - ... - - let impl_generator = Deriving.Generator.V2.make_noarg generate_impl - - let intf_generator = Deriving.Generator.V2.make_noarg generate_intf - - let my_deriver = - Deriving.add - "my_deriver" - ~str_type_decl:impl_generator - ~sig_type_decl:intf_generator -]} - -The call to {{!Ppxlib.Deriving.add}[Deriving.add]} is how you'll let [ppxlib] -know about your deriver. The first string argument is the name of the deriver as -referred to by your users, in the above example one would add a [[@@deriving -my_deriver]] annotation to use this plugin. Here our deriver can be used on type -declarations, be it in structures or signatures (i.e. implementation or -interfaces, [.ml] or [.mli]). - -To add a deriver you first have to define a generator. You need one for each -node you want to derive code from. Here we just need one for type declarations -in structures and one for type declarations in signatures. To do that you need -the -{{!Ppxlib.Deriving.Generator.V2.make_noarg}[Deriving.Generator.V2.make_noarg]} -constructor. You'll note that there exists -{{!Ppxlib.Deriving.Generator.V2.make}[Deriving.Generator.V2.make]} variant if -you wish to allow passing arguments to your deriver but to keep this tutorial -simple we won't cover this here. The only mandatory argument to the constructor -is a function which takes a labelled -{{!Ppxlib.Expansion_context.Deriver.t}[Expansion_context.Deriver.t]}, an -['input_ast] and returns an ['output_ast] and that will give us a [('output_ast, -'input_ast) Deriving.Generator.t]. Much like the [expand] function described in -the section about extension rewriters, this function is where the actual -implementation for your deriver lives. The [str_type_decl] argument of -{{!Ppxlib.Deriving.add}[Deriving.add]} expects a [(structure, rec_flag * -type_declaration list) Generator.t] so our [generate_impl] function must take a -pair [(rec_flag, type_declaration list)] and return a [structure] i.e. a -[structure_item list], for instance a list of function or module declaration. -The same goes for the [generate_intf] function except that it must return a -[signature]. It is often the case that a deriver has a generator for both the -structure and signature variants of a node. That allows users to generate the -signature corresponding to the code generated by the deriver in their [.ml] -files instead of having to type it and maintain it themselves. - -If you want to look at a concrete example of deriver you can find one in the -[examples/] folder of the [ppxlib] repository -{{:https://github.com/ocaml-ppx/ppxlib/tree/main/examples/simple-deriver}here}. - -{2 Metaquot} - -[metaquot] is a PPX plugin that helps you write PPX plugins. It lets you write -AST node values using the actual corresponding OCaml syntax instead of building -them with the more verbose AST types or [Ast_builder]. - -To use [metaquot] you need to add it to the list of preprocessor for your PPX -plugin: - -{[ - (library - (name my_plugin_lib) - (preprocess (pps ppxlib.metaquot))) -]} - -[metaquot] can be used both to write expressions of some of the AST types or to -write patterns to match over those same types. The various extensions it exposes -can be used in both contexts, expressions or patterns. - -The extension you should use depends on the type of AST node you're trying to -write or to pattern-match over. You can use the following extensions with the -following syntax: - -- [expr] for {{!Ppxlib.Parsetree.expression}[Parsetree.expression]}: - [[%expr 1 + 1]] -- [pat] for {{!Ppxlib.Parsetree.pattern}[Parsetree.pattern]}: [[%pat? ("", _)]] -- [type] for {{!Ppxlib.Parsetree.core_type}[Parsetree.core_type]}: - [[%type: int -> string]] -- [stri] for {{!Ppxlib.Parsetree.structure_item}[Parsetree.structure_item]}: - [[%stri let a = 1]] -- [sigi] for {{!Ppxlib.Parsetree.signature_item}[Parsetree.signature_item]}: - [[%sigi: val i : int]] -- [str] and [sig] respectively for - {{!Ppxlib.Parsetree.structure}[Parsetree.structure]} - and {{!Ppxlib.Parsetree.signature}[Parsetree.signature]}. They use similar - syntax to the [_item] extensions above as they are just a list of such items. - -If you consider the first example [[%expr 1 + 1]], in an expression context, -[metaquot] will actually expand it into: - -{[ - { - pexp_desc = - (Pexp_apply - ({ - pexp_desc = (Pexp_ident { txt = (Lident "+"); loc }); - pexp_loc = loc; - pexp_attributes = [] - }, - [(Nolabel, - { - pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); - pexp_loc = loc; - pexp_attributes = [] - }); - (Nolabel, - { - pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); - pexp_loc = loc; - pexp_attributes = [] - })])); - pexp_loc = loc; - pexp_attributes = [] - } -]} - -For this to compile you need the AST types to be in the scope so you should -always use [metaquot] where [Ppxlib] is opened. You'll also note that the -generated node expects a [loc : Location.t] value to be available. The produced -AST node value and every other nodes within it will be located to [loc]. You -should make sure [loc] is the location you want for your generated code when -using [metaquot]. - -When using the pattern extension, it will produce a pattern that matches no -matter what the location and attributes are. For the previous example for -instance, it will produce the following pattern: - -{[ - { - pexp_desc = - (Pexp_apply - ({ - pexp_desc = (Pexp_ident { txt = (Lident "+"); loc = _ }); - pexp_loc = _; - pexp_attributes = _ - }, - [(Nolabel, - { - pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); - pexp_loc = _; - pexp_attributes = _ - }); - (Nolabel, - { - pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); - pexp_loc = _; - pexp_attributes = _ - })])); - pexp_loc = _; - pexp_attributes = _ - } -]} - -Using these extensions alone, you can only produce constant/static AST nodes. -You can't bind variables in the generated patterns either. [metaquot] has a -solution for that as well: anti-quotation. You can use anti-quotation to insert -any expression or pattern representing an AST node. That way you can include -dynamically generated nodes inside a [metaquot] expression extension point or -use a wildcard or variable pattern in a pattern extension. - -Consider the following example: - -{[ - let with_suffix_expr ~loc s = - let dynamic_node = Ast_builder.Default.estring ~loc s in - [%expr [%e dynamic_node] ^ "some_fixed_suffix"] -]} - -The [with_suffix_expr] function will create an [expression] which is the -concatenation of the [s] argument and the fixed suffix. I.e. [with_suffix_expr -"some_dynamic_stem"] is equivalent to [[%expr "some_dynamic_steme" ^ -"some_fixed_suffix"]]. - -Similarly if you want to ignore some parts of AST nodes and extract some others -when pattern-matching over them, you can use anti-quotation: - -{[ - match some_expr_node with - | [%expr 1 + [%e? _] + [%e? third]] -> do_something_with third -]} - -The syntax for anti-quotation depends on the type of the node you wish to insert: - -- [e] to anti-quote values of type - {{!Ppxlib.Parsetree.expression}[Parsetree.expression]}: - [[%expr 1 + [%e some_expr_node]]] -- [p] to anti-quote values of type - {{!Ppxlib.Parsetree.pattern}[Parsetree.pattern]}: - [[%pat? (1, [%p some_pat_node])]] -- [t] to anti-quote values of type - {{!Ppxlib.Parsetree.core_type}[Parsetree.core_type]}: - [[%type: int -> [%t some_core_type_node]]] -- [m] to anti-quote values of type - {{!Ppxlib.Parsetree.module_expr}[Parsetree.module_expr]} - or {{!Ppxlib.Parsetree.module_type}[module_type]}: - [[%expr let module M = [%m some_module_expr_node]]] or - [[%sigi: module M : [%m some_module_type_node]]] -- [i] to anti-quote values of type - {{!Ppxlib.Parsetree.structure_item}[Parsetree.structure_item]} or - {{!Ppxlib.Parsetree.signature_item}[signature_item]}: - [[%str let a = 1 [%%i some_structure_item_node]]] or - [[%sig: val a : int [%%i some_signature_item_node]]] - -Note that when anti-quoting in a pattern context you must always use the [?] in -the anti-quotation extension as its payload should always be a pattern the same -way it must always be an expression in an expression context. - -As you may have noticed, you can anti-quote expressions which type differs from -the type of the whole [metaquot] extension point. E.g. you can write: - -{[ - let structure_item = - [%stri let [%p some_pat] : [%t some_type] = [%e some_expr]] -]} - -{2 Handling errors} - - -In order to give a nice user experience when reporting errors or failures in a ppx, it is necessary to include as much of the generated content as possible. Most IDE tools, such as Merlin, rely on the AST for their features, such as displaying type, jumping to definition or showing the list of errors. - -{3 Embedding the errors in the AST} - -A common way to report an error is to throw an exception. However, this method interrupts the execution flow of the ppxlib driver and leaves later PPX's unexpanded when handing the AST over to merlin. - -Instead, it is better to always return a valid AST, as complete as possible, but with "error extension nodes" at every place where successful code generation was impossible. Error extension nodes are special extension nodes [[%ocaml.error error_message]], which can be embedded into a valid AST and are interpreted later as errors, for instance by the compiler or Merlin. As all extension nodes, they can be put {{:https://ocaml.org/manual/extensionnodes.html}at many places in the AST}, to replace for instance structure items, expressions or patterns. - -So whenever you're in doubt if to throw an exception or if to embed the error as an error extension node when writing a ppx rewriter, the answer is most likely: embed the error is the way to go! And whenever you're in doubt about where exactly inside the AST to embed the error, a good rule of thumb is: as deep in the AST as possible. - -For instance, suppose a rewriter is supposed to define a new record type, but there is an error in the generation of the type of one field. In order to have the most complete AST as output, the rewriter can still define the type and all of its fields, putting an extension node in place of the type of the faulty field: - -{[ - type long_record = { - field_1: int; - field_2: [%ocaml.error "field_2 could not be implemented due to foo"]; - } -]} - -Ppxlib provides a function in its API to create error extension nodes: {{!Ppxlib.Location.error_extensionf}[error_extensionf]}. This function creates an extension node, which has then to be transformed in the right kind of node using functions such as for instance {{!Ppxlib.Ast_builder.Default.pexp_extension}[pexp_extension]}. - -{3 A documented example} - -Let us give an example. We will define a deriver on types records, which constructs a default value from a given type. For instance, the derivation on the type [type t = { x:int; y: float; z: string}] would yield [let default_t = {x= 0; y= 0.; z= ""}]. This deriver has two limitations: - -{ol -{- It does not work on other types than records,} -{- It only works for records containing fields of type [string], [int] or [float].} -} - -The rewriter should warn the user about these limitations with a good error reporting. Let us first look at the second point. Here is the function mapping the fields from the type definition to a default expression. - -{[ - let create_record ~loc fields = - let declaration_to_instantiation (ld : label_declaration) = - let loc = ld.pld_loc in - let { pld_type; pld_name; _ } = ld in - let e = - match pld_type with - | { ptyp_desc = Ptyp_constr ({ txt = Lident "string"; _ }, []); _ } -> - pexp_constant ~loc (Pconst_string ("", loc, None)) - | { ptyp_desc = Ptyp_constr ({ txt = Lident "int"; _ }, []); _ } -> - pexp_constant ~loc (Pconst_integer ("0", None)) - | { ptyp_desc = Ptyp_constr ({ txt = Lident "float"; _ }, []); _ } -> - pexp_constant ~loc (Pconst_float ("0.", None)) - | _ -> - pexp_extension ~loc - @@ Location.error_extensionf ~loc - "Default value can only be derived for int, float, and string." - in - ({ txt = Lident pld_name.txt; loc }, e) - in - let l = List.map fields ~f:declaration_to_instantiation in - pexp_record ~loc l None -]} - - -When the record definition contains several fields with types other than [int], [float] or [string], several error nodes are added in the AST. Moreover, the location of the error nodes corresponds to the definition of the record fields. This allows tools such as Merlin to report all errors at once, at the right location, resulting in a better workflow than having to recompile everytime one error is corrected to see the next one. - -The first limitation is that the deriver cannot work on non record types. However, we decided here to derive a default value even in the case of non-record types, so that it does not appear as undefined in the remaining of the file. This impossible value consists of an error extension node. - -{[ - let generate_impl ~ctxt (_rec_flag, type_declarations) = - let loc = Expansion_context.Deriver.derived_item_loc ctxt in - List.map type_declarations ~f:(fun (td : type_declaration) -> - let e, name = - match td with - | { ptype_kind = Ptype_record fields; ptype_name; ptype_loc; _ } -> - (create_record ~loc:ptype_loc fields, ptype_name) - | { ptype_name; ptype_loc; _ } -> - ( pexp_extension ~loc - @@ Location.error_extensionf ~loc:ptype_loc - "Cannot derive accessors for non record type %s" - ptype_name.txt, - ptype_name ) - in - [ - pstr_value ~loc Nonrecursive - [ - { - pvb_pat = ppat_var ~loc { txt = "default_" ^ name.txt; loc }; - pvb_expr = e; - pvb_attributes = []; - pvb_loc = loc; - }; - ]; - ]) - |> List.concat -]} - -{3 In case of panic} - -In some rare cases, it might happen that a whole file rewriter is not able to output a meaningful AST. In this case, they might be tempted to raise a located error: an exception that includes the location of the error. Moreover, this h as historically been what was suggested to do by ppxlib examples, but is now discouraged in most of the cases, as it prevents Merlin features to work well. - -If such an exception is uncaught, the ppx driver will return with an error code and the exception will be pretty-printed, including the location (that's the case when the driver is called by dune). When the driver is spawned with the [-embed-errors] or [-as-ppx] flags (that's the case when the driver is called by merlin), the driver will look for located error. If it catches one, it will stop its chain of rewriting at this point, and output an AST consisting of the located error followed by the last valid AST: the one passed to the raising rewriter. - -Even more in context-free rewriters, raising should be avoided, in favour of outputting a single error node when a finer grained reporting is not needed or possible. As the whole context-free rewriting is done in one traverse of the AST, a single raise will cancel both the context-free pass and upcoming rewriters, and the AST prior to the context-free pass will be outputted together with the error. - -The function provided by the API to raise located errors is {{!Ppxlib.Location.raise_errorf}[raise_errorf]}. - -{3 Migrating from raising to embedding errors} - -Lots of ppx-es exclusively use {{!Ppxlib.Location.raise_errorf}[raise_errorf]} to report errors, instead of the more merlin friendly way consisting of embedding errors in the AST, as described in this section. - -If you want to migrate such a codebase to the embedding approach, here are a few recipes to do that. Indeed, it might not be completely trivial, as raising can be done anywhere in the code, including in places where "embedding" would not make sense. What you can do is to turn your internal raising functions to function returning a [result] type. - -The workflow for this change would look like this: - -{ol -{- Search through your code all uses of {{!Ppxlib.Location.raise_errorf}[raise_errorf]}, using for instance [grep].} -{- For each of them, turn them into function returning a [(_, extension) result] type, using {{!Ppxlib.Location.error_extensionf}[error_extensionf]} to generate the [Error].} -{- Let the compiler or merlin tell you where you need to propagate the [result] type (most certainly using [map]s and [bind]s).} -{- When you have propagated until a point where you can, embed the extension in case of [Error extension].} -} - -This is quite convenient, as it allows you to do a "type-driven" modification, using at full the static analysis of OCaml to never omit a special case, and to confidently find the place the most deeply in the AST to embed the error. However, it might induces quite a lot of code modification, and exceptions are sometimes convenient to use, depending on the taste. In case you want to do only a very simple to keep using exception, catch them and turn them into extension points embedded in the AST, here is an example: - -{[ -let rewrite_extension_point loc payload = - try generate_ast payload - with exn -> - let get_error exn = - match Location.Error.of_exn exn with - | None -> raise exn - | Some error -> error - in - let extension = exn |> get_error |> Location.Error.to_extension in - Ast_builder.Default.pstr_extension ~loc ext [] -]} diff --git a/doc/matching-code.mld b/doc/matching-code.mld new file mode 100644 index 00000000..ef722c96 --- /dev/null +++ b/doc/matching-code.mld @@ -0,0 +1,519 @@ +{%html:
%}{{!"generating-code"}< Generating AST nodes}{%html:
%}{{!"ast-traversal"}Traversing the AST >}{%html:
%} + +{0 Destructing AST Nodes} + +In the previous chapter, we have seen how to generate code. However, the +transformation function should depend on its input (the payload and maybe the +derived item), which we have to be able to inspect. + +Once again, directly inspecting the {{!Ppxlib.Parsetree}[Parsetree]} value that +we get as input is not a good option because it is very big to manipulate and can +break at every new OCaml release. For instance, let's consider the case of +{{:https://github.com/janestreet/ppx_inline_test}[ppx_inline_test]}. We want to +recognize and extract the name and expression only from the form patterns: + +{[ + [%%test let "name" = expr] +]} + +If we wrote a function accepting the payload of [[%%test]], and extracting the +name and expression from it, using normal pattern matching we would have: + +{[ + # let match_payload ~loc payload = + match payload with + | PStr + [ + { + pstr_desc = + Pstr_value + ( Nonrecursive, + [ + { + pvb_pat = + { + ppat_desc = + Ppat_constant (Pconst_string (name, _, None)); + _; + }; + pvb_expr = expr; + _; + }; + ] ); + _; + }; + ] -> + Ok (name, expr) + | _ -> Error (Location.Error.createf ~loc "Wrong pattern") ;; + val match_payload : + loc:location -> payload -> (string * expression, Location.Error.t) result = +]} + +[ppxlib]'s solution to the verbosity and stability problem is to provide helpers +to {e match} the AST, in a very similar way to what it does for generating AST +nodes. + +{1 The Different Options} + +In this chapter, we will often mention the similarities between matching code +and generating code (from the {{!"generating-code"}previous chapter}). Indeed, the +options provided by [ppxlib] to match AST nodes mirror the ones for generating +nodes: + +- {{!Ppxlib.Ast_pattern}[Ast_pattern]}, the {{!Ppxlib.Ast_builder}[Ast_builder]} sibling, +- {{!Ppxlib_metaquot}[Metaquot]} again. + +{{!Ppxlib.Ast_pattern}[Ast_pattern]} is used in {{!Ppxlib.Extension.V3.declare}[Extension.V3.declare]}, so you will need it to write +extenders. {!Ppxlib_metaquot} is, as for generating nodes, more natural to use but also +restricted to some cases. + +{1:ast_pattern_intro The [Ast_pattern] Module} + +A match is a "structural destruction" of a value into multiple subvalues to +continue the computation. For instance, in the example above from the single +variable [payload], we structurally extract two variables: [name] and [expr]. + +Destruction is very similar to construction, but in reverse. Instead of using +several values to build a bigger one, we use one big value to define smaller +ones. As an illustration, note how in OCaml the following construction and +destruction are close: + +{[ + let big = { x ; y } (** Construction from [x] and [y] *) + let { x ; y } = big (** Destruction recovering [x] and [y] *) +]} + +For the same reason, building AST nodes using {{!Ppxlib.Ast_builder}[Ast_builder]} and destructing AST +nodes using {{!Ppxlib.Ast_pattern}[Ast_pattern]} look very similar. The difference is that in the construction "leaf," {{!Ppxlib.Ast_builder}[Ast_builder]} uses actual values, while {{!Ppxlib.Ast_pattern}[Ast_pattern]} has +"wildcards" at the leafs. + +Consider the example in the introduction matching [[%%test let "name" = expr]]. +Building such an expression with {{!Ppxlib.Ast_builder}[Ast_builder]} could look like: + +{[ + # let build_payload_test ~loc name expr = + let (module B) = Ast_builder.make loc in + let open B in + Parsetree.PStr + (pstr_value Nonrecursive + (value_binding ~pat:(pstring name) ~expr :: []) + :: []) ;; + val build_payload_test : + loc:location -> string -> expression -> payload = + +]} + +Constructing a first-class pattern is almost as simple as replacing +[Ast_builder] with [Ast_pattern], as well as replacing the base values [name] and [expr] with a +capturing wildcard: + +{[ + # let destruct_payload_test () = + let open Ast_pattern in + pstr + (pstr_value nonrecursive + (value_binding ~pat:(pstring __) ~expr:__ ^:: nil) + ^:: nil) ;; + val destruct_payload_test : + unit -> (payload, string -> expression -> 'a, 'a) Ast_pattern.t = + +]} + +Note that to facilitate viewing the similarity, we wrote [[v]] as [v :: []], and +we added a [unit] argument to avoid +{{:https://v2.ocaml.org/manual/polymorphism.html#ss:valuerestriction}value +restriction} to mess with the type (that we explained right in the next section). + +{2 The Type for Patterns} + +The {{!Ppxlib.Ast_pattern.t}[Ast_pattern.t]} type reflects the fact that a pattern-match or destruction +is taking a value, extracting other values from it, and using them to finally +output something. So, a value [v] of type [(matched, cont, res) Ast_pattern.t] +means that: + +- The type of values matched by [v] is [matched]. For instance, [matched] could + be {{!Ppxlib.Parsetree.payload}[payload]}. +- The continuation (what to do with the extracted values) has type [cont]. The + values extracted from the destruction are passed as an argument to the + continuation, therefore [cont] includes information about them. For instance, + for a pattern that captures an [int] and a [string], [cont] could be + [int -> string -> structure]. The continuation is not part of [v]; it will + be given with the value to match. +- The result of the computation has type [res]. Note that this is additional information + than what we have in [cont]: {{!Ppxlib.Ast_pattern.map_result}[Ast_pattern.map_result]} + allows mapping the continuation result through a function! This allows users to add a + "construction" post-processing to the continuation. A value of type + [(pattern, int -> int, expression) Ast_pattern.t] would contain how to extract an integer from a [pattern] and how to map a modified [int] into an [expression]. + + +In the case of the example above, [destruct_payload_test] has type: +{[ + # destruct_payload_test ;; + val destruct_payload_test : + (payload, string -> expression -> 'a, 'a) Ast_pattern.t = + +]} + as it destructs values +of type [pattern] extracts two values, respectively, of type [string] and +[expression], so the continuation has type [string -> expression -> 'a]. Then the +result type is ['a] since no mapping on the result is made. Now that the type of {{!Ppxlib.Ast_pattern.t}[Ast_pattern.t]} is explained, the type of +{{!Ppxlib.Ast_pattern.parse_res}[Ast_pattern.parse_res]}, the function for applying patterns, should make sense: + +{@ocaml[ + # Ast_pattern.parse_res ;; + val parse_res : + ( 'matched, 'cont, 'res ) t -> + Location.t -> + ?on_error:( unit -> 'res) -> + 'matched -> + 'cont -> + ( 'res, Location.Error.t Stdppx.NonEmptyList.t ) result = + +]} + +This function takes a pattern expecting values of type ['matched], continuations of +type ['cont] and output values of type [('res, _) result] (where the error case is when the ['matched] value does not have the expected structure). +The types of the function's other arguments correspond to this understanding: the argument of type ['matched] is +the value to match, the one of type ['cont] is the continuation, and the result +of applying the pattern to those two values is of type ['res]! + +Composing construction and destruction yield the identity: + +{@ocaml[ + # let f name expr = + Ast_pattern.parse_res + (destruct_payload_test ()) Location.none + (build_payload_test ~loc name expr) + (fun name expr -> (name, expr)) ;; + val f : + string -> + expression -> + (string * expression, _) result = + # f "name" [%expr ()] ;; + Ok + ("name", + {pexp_desc = + Pexp_construct + ({txt = Lident "()"; + ...}...)...}...) +]} + +While the {{!Ppxlib.Ast_pattern.parse_res}[Ast_pattern.parse_res]} function is useful to match an AST node, you +will also need the {{!Ppxlib.Ast_pattern.t}[Ast_pattern.t]} value in other contexts. For instance, it is +used when declaring extenders with {{!Ppxlib.Extension.declare}[Extension.declare]} to tell how to extract +arguments from the payload to give them to the extender, or when parsing with {{!Ppxlib.Deriving.Args.arg}deriving arguments}. + +{2 Building Patterns} + +Now that we know what these patterns represent and how to use them, and have seen an +example in the {{!ast_pattern_intro}introduction} on {{!Ppxlib.Ast_pattern}[Ast_pattern]}, the +combinators in the {{!Ppxlib.Ast_pattern}API} should be much more easily +understandable. So, for a comprehensive list of the different values in the +module, the reader should directly refer to the API. In this guide; however, we +explain in more detail a few important values with examples. + +{b The wildcard pattern [| x -> ]}. The simplest way to extract a value from something +is just to return it! In {{!Ppxlib.Ast_pattern}[Ast_pattern]}, it corresponds to the value +{{!Ppxlib.Ast_pattern.__}[__]} (of type [('a, 'a -> 'b, 'b)]), which extract the +value it's given: {{!Ppxlib.Ast_pattern.parse_res}matching} a value [v] +with this pattern and a continuation [k] would simply call [k v]. + +This pattern is useful in combination with other combinators. + +{b The wildcard-dropping pattern [| _ -> ]}. Despite their name ressemblance, +{{!Ppxlib.Ast_pattern.__}[__]} is very different from the OCaml pattern-match +wildcard [_], which accepts everything but {e ignores} its input. In {{!Ppxlib.Ast_pattern}[Ast_pattern]}, +the wildcard-dropping pattern is {{!Ppxlib.Ast_pattern.drop}[drop]}. Again, it +is useful in conjunction with other combinators, where one needs +to accept all input in some places, but the value is not relevant. + +{b The [| p as name -> ] combinator}. The combinator {{!Ppxlib.Ast_pattern.as__}[as__]} +allows passing a node to the continuation while still extracting values from +this node. For instance, [as__ (some __)] corresponds to the OCaml pattern-match +[ Some n2 as n1], where the continuation is called with [k n1 n2]. + +{b The [| (p1 | p2) -> ] combinator}. The combinator {{!Ppxlib.Ast_pattern.alt}[alt]} +combines two patterns with the same type for extracted values into one pattern +by first trying to apply the first, and if it fails, by applying the second one. +For instance, [alt (pair (some __) drop) (pair drop (some __))] corresponds to +the OCaml pattern [(Some a, _) | (_, Some b)]. + +{b The constant patterns [| "constant" -> ]}. Using {{!Ppxlib.Ast_pattern.cst}[Ast_pattern.cst]} it is +possible to create patterns matching only fixed values, such as the ["constant"] +string. No values are extracted from this matching. The functions for creating +such values are {{!Ppxlib.Ast_pattern.int}[Ast_pattern.int]}, {{!Ppxlib.Ast_pattern.string}[Ast_pattern.string]}, {{!Ppxlib.Ast_pattern.bool}[Ast_pattern.bool]}, ... + +{b The common deconstructors}. Many usual common constructors have +"deconstructors" in {{!Ppxlib.Ast_pattern}[Ast_pattern]}. For instance: +- [some __] corresponds to [Some a], +- [__ ^:: drop ^:: nil] correspnds to [a :: _ :: []], +- [pair __ __] (or equivalently [__ ** __]) corresponds to [(a,b)], etc. + +{b The Parsetree deconstructors}. All constructors from {{!Ppxlib.Ast_builder}[Ast_builder]} have a +"deconstructor" in {{!Ppxlib.Ast_pattern}[Ast_pattern]} with the same name. For instance, since +{{!Ppxlib.Ast_builder}[Ast_builder]} has a constructor {{!Ppxlib.Ast_builder.Default.pstr_value}[pstr_value]} to build a structure +item from a [rec_flag] and a [value_binding] list. {{!Ppxlib.Ast_pattern}[Ast_pattern]} has an equally +named {{!Ppxlib.Ast_pattern.pstr_value}[pstr_value]} which, given ways to destruct rec flags and +[value_binding] lists, creates a destructor for structure items. + +{b The continuation modifiers}. Many {{!Ppxlib.Ast_pattern}[Ast_pattern]} values allow modifying the +continuation. It can be it a map on the continuation itself, the argument to the +continuation, or the result of the continuation. So, {{!Ppxlib.Ast_pattern.map}[Ast_pattern.map]} transforms the +continuation itself, e.g., [map ~f:Fun.flip] will switch the arguments of +the function. {{!Ppxlib.Ast_pattern.map1}[map]} modifies the arguments to a +continuation of arity [i]: [map2 ~f:combine] is equivalent to +[map ~f:(fun k -> (fun x y -> k (combine x y)))]. Finally, {{!Ppxlib.Ast_pattern.map_result}[Ast_pattern.map_result]} modifies +the continuation's result, and [map_result ~f:ignore] would ignore the continuation's result. + +{b Common patterns} Some patterns are sufficiently common that, although they can be built from smaller bricks, they are already defined in {{!Ppxlib.Ast_pattern}[Ast_pattern]}. For instance, matching a single expression in a payload is given as {{!Ppxlib.Ast_pattern.single_expr_payload}[Ast_pattern.single_expr_payload]}. + +{2:pattern_examples Useful patterns and examples} + +Below, is a list of patterns that are commonly needed when using {{!Ppxlib.Ast_pattern}[Ast_pattern]}: + +{@ocaml[ + open Ast_pattern +]} + +- A pattern to extract an expression from an extension point payload: + +{@ocaml[ + # let extractor () = single_expr_payload __ ; + val extractor : unit -> (payload, expression -> 'a, 'a) t = +]} + +- A pattern to extract a string from an extension point payload: + +{@ocaml[ + # let extractor () = single_expr_payload (estring __) ; + val extractor : unit -> (payload, string -> 'a, 'a) t = +]} + +- A pattern to extract a pair [int * float] from an extension point payload: + +{@ocaml[ + # let extractor () = single_expr_payload (pexp_tuple (eint __ ^:: efloat __ ^:: nil)) ;; + val extractor : unit -> (payload, int -> string -> 'a, 'a) t = +]} + +- A pattern to extract a list of integers from an extension point payload, given + as a tuple (of unfixed length): + +{@ocaml[ + # let extractor () = single_expr_payload (pexp_tuple (many (eint __))) ;; + val extractor : unit -> (payload, int -> string -> 'a, 'a) t = +]} + +- A pattern to extract a list of integers from an extension point payload, given + as a list: + +{@ocaml[ + # let extractor () = single_expr_payload (elist (eint __)) ;; + val extractor : unit -> (payload, int list -> 'a, 'a) t = +]} + +- A pattern to extract the [pattern] and the [expression] in a let-binding, from a structure item: + +{@ocaml[ + # let extractor_in_let () = pstr_value drop ((value_binding ~pat:__ ~expr:__) ^:: nil);; + val extractor_in_let : unit -> (structure_item, pattern -> expression -> 'a, 'a) t = + +]} + +- A pattern to extract the [pattern] and the [expression] in a let-binding, from an extension point payload: + +{@ocaml[ + # let extractor () = pstr @@ extractor_in_let ^:: nil;; + val extractor : unit -> (payload, pattern -> expression -> 'a, 'a) t = +]} + +- A pattern to extract a core type, from an extension point payload (with a comma in the extension node, such as [[%ext_name: core_type]]): + +{@ocaml[ + # let extractor () = ptyp __ + val extractor : unit -> (payload, core_type -> 'a, 'a) t = +]} + +- A pattern to extract a string from an expression, either from an identifier or from a string. That is, it will extract the string ["foo"] from both the AST nodes [foo] and ["foo"]. + +{@ocaml[ + # let extractor () = alt (pexp_ident (lident __)) (estring __) ;; + val extractor : unit -> (expression, string -> 'a, 'a) t = +]} + +- A pattern to extract a sequence of two idents, as strings (will extract ["foo"], ["bar"] from [[%ext_name foo bar]]): + +{@ocaml[ + let extractor () = + single_expr_payload @@ + pexp_apply + (pexp_ident (lident __)) + ((no_label (pexp_ident (lident __))) ^:: nil) ;; + val extractor : unit -> (payload, string -> string -> 'a, 'a) t = ]} + +{1:metaquot [Metaquot]} +{2 [Metaquot] for Patterns} + +Recall that [ppxlib] provides a rewriter to generate code explained in +{{!page-"generating-code".metaquot}the corresponding chapter}. The same PPX can +also generate patterns when the extension nodes are used patterns: for +instance, in what follows, the extension node will be replaced by a value of {{!Ppxlib.Parsetree.expression}[expression]} type: + +{[ + let f = [%expr 1 + 1] +]} + +While in the following, it would be replaced by a pattern matching on values of {{!Ppxlib.Parsetree.expression}[expression]} type: + +{[ + let f x = match x with + | [%expr 1 + 1] -> ... + | _ -> ... +]} + +The produced pattern matches regardless of location and attributes. For +the previous example, it will produce the following pattern: + +{[ + { + pexp_desc = + (Pexp_apply + ({ + pexp_desc = (Pexp_ident { txt = (Lident "+"); loc = _ }); + pexp_loc = _; + pexp_attributes = _ + }, + [(Nolabel, + { + pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); + pexp_loc = _; + pexp_attributes = _ + }); + (Nolabel, + { + pexp_desc = (Pexp_constant (Pconst_integer ("1", None))); + pexp_loc = _; + pexp_attributes = _ + })])); + pexp_loc = _; + pexp_attributes = _ + } +]} + +While being less general than {{!Ppxlib.Ast_pattern}[Ast_pattern]}, this allows users to write +patterns in a more natural way. Due to the OCaml AST, {{!Ppxlib.Parsetree.payload}payloads} can only +take the form of a {{!Ppxlib.Parsetree.structure}[structure]}, a {{!Ppxlib.Parsetree.signature}[signature]}, a {{!Ppxlib.Parsetree.core_type}[core type]}, or a {{!Ppxlib.Parsetree.pattern}[pattern]}. We might +want to generate pattern matching for other kinds of nodes, such as expressions or +structure item. The same extension nodes that [Metaquot] provides +for building can be used for matching: + +- The [expr] extension node to match on {{!Ppxlib.Parsetree.expression}[expressions]}: + {[match expr with [%expr 1 + 1] -> ...]} +- The [pat] extension node to match on {{!Ppxlib.Parsetree.pattern}[patterns]}: +{[match pattern with [%pat? ("", _)] -> ...]} +- The [type] extension node to match on for {{!Ppxlib.Parsetree.core_type}[core types]}: + {[match typ with [%type: int -> string] -> ...]} +- The [stri] and [sigi] extension nodes to match on {{!Ppxlib.Parsetree.structure_item}[structure_item]} and {{!Ppxlib.Parsetree.signature_item}[signature_item]}: + {[match stri with [%stri let a = 1] -> ... + match sigi with [%sigi: val a : int] -> ...]} +- The [str] and [sig] extension nodes to match on + {{!Ppxlib.Parsetree.structure}[structure]} + and {{!Ppxlib.Parsetree.signature}[signature]}. + {[ +let _ = + match str with + | [%str + let a = 1 + let b = 2.1] -> + () + +let _ = + match sigi with + | [%sigi: + val a : int + val b : float] -> + () + +]} + +{2:antiquotations Anti-Quotations} + +{{!page-"generating-code".antiquotations}Similarly} to the [expression] context, these extension nodes have a limitation: when using these extensions alone, you can't +bind variables. [Metaquot] also solves +this problem using anti-quotation. +In the [pattern] context, anti-quotation is not used to insert values but to insert +patterns. That way you can include a wildcard or variable-binding pattern. + +Consider the following example, which matches expression nodes corresponding to +the sum of three expressions: starting with the constant 1, followed by anything, +followed by anything bound to the [third] variable, which has type +[expression]: + +{[ + match some_expr_node with + | [%expr 1 + [%e? _] + [%e? third]] -> do_something_with third +]} + +The syntax for anti-quotation depends on the type of the node you wish to insert +(which must also correspond to the context of the anti-quotation extension node): + +- The extension point [e] is used to anti-quote values of type + {{!Ppxlib.Parsetree.expression}[expression]}: + {[match e with [%expr 1 + [%e? some_expr_pattern]] -> ...]} +- The extension point [p] is used to anti-quote values of type + {{!Ppxlib.Parsetree.pattern}[pattern]}: + {[match pat with [%stri let [%p? x] = [%e? y]] -> do_something_with x y]} +- The extension point [t] is used to anti-quote values of type + {{!Ppxlib.Parsetree.core_type}[core_type]}: + {[match t with [%type: int -> [%t? _]] -> ...]} +- The extension point [m] is used to anti-quote values of type + {{!Ppxlib.Parsetree.module_expr}[module_expr]} + or {{!Ppxlib.Parsetree.module_type}[module_type]}: + {[ +let [%expr + let module M = [%m? extracted_m] in + M.x] = + some_expr +in +do_something_with extracted_m + +let _ = fun [%sigi: module M : [%m? input]] -> do_something_with input +]} +- The extension point [i] is used to anti-quote values of type + {{!Ppxlib.Parsetree.structure_item}[structure_item]} or + {{!Ppxlib.Parsetree.signature_item}[signature_item]}: + {[ +let [%str + let a = 1 + + [%%i? stri2]] = + e +in +do_something_with stri2 +;; + +let [%sig: + val a : int + + [%%i? sigi2]] = + s +in +do_something_with sigi2 +]} + +Remember, since we are inserting patterns (and not expressions), we always use +patterns as payload, as in [[%e? x]]. + +If an anti-quote extension node is in the wrong context, it won't be rewritten +by [Metaquot]. For instance, in [fun [%expr 1 + [%p? x]] -> x] the +anti-quote extension node for the expression is put in a pattern context, and +it won't be rewritten. +On the contrary, you should use anti-quotes whose kind ([[%e ...]], [[%p ...]]) +match the context. For example, you should write: + +{@ocaml[ +fun [%stri let ([%p pat] : [%t type_]) = [%e expr]] -> + do_something_with pat type_ expr +]} + +{%html:
%}{{!"generating-code"}< Generating AST nodes}{%html:
%}{{!"ast-traversal"}Traversing the AST >}{%html:
%} diff --git a/doc/quick_intro.mld b/doc/quick_intro.mld new file mode 100644 index 00000000..85d76306 --- /dev/null +++ b/doc/quick_intro.mld @@ -0,0 +1,84 @@ +{%html:
%}{%html:
%}{{!driver}The Driver >}{%html:
%} + +{0 Introduction} + +This guide is intended at authors or future authors of PPX rewriters. If you +don't know what a PPX is, or if you are looking for a guide intended at PPX +users, read the {{:https://ocaml.org/docs/metaprogramming}OCaml +official guide on meta-programming} first, although the beginning of this guide +may be of interest to everyone. + +{1 Preprocessing in OCaml} + +OCaml doesn't have a macro system, that is, there is no official part of the +OCaml language that will be executed at compile time in order to generate or +alter the source code. However, OCaml does have an official part of its syntax dedicated to this: {{:https://v3.ocaml.org/manual/extensionnodes.html}extension nodes} and {{:https://v2.ocaml.org/manual/attributes.html}attributes}; both of them introduced in OCaml 4.02. The preprocessing itself, though, is left to external programs, +written by the community and specialised for their own tasks. However, without a +unification framework, the following issues arise: + +- Ambiguity when using several preprocessors due to lacking clear composition semantics +- Duplication of code and efforts on the different preprocessors +- Performance loss with many phases (parsing, pretty-printing, etc.) being + executed multiple times +- Lack of cross-compiler compatibility +- Incompatibility among the different preprocessor rather than one homogeneous preprocessor ecosystem + +{1 [ppxlib]} + +The goal of [ppxlib] is to solve these problems, by providing a unifying +framework for writing preprocessors. It sits in between the OCaml compiler and +toolchain, and the PPX authors provide an API for them. One could sum up the +[ppxlib] features as: + +- It deals with all boilerplate, such as parsing the input, outputting the rewritten + output, generating an executable, etc. +- It generates a single executable for multiple transformations and defines clear composition semantics for local transformations +- It integrates well with Dune and Merlin +- It provides a more stable API than the compiler for manipulating the AST +- A single PPX codebase usually works on several OCaml versions of the AST +- It defines restricted rewriters whose semantic ensure better confidence for the + user and better compositional semantics +- It provides many helpers to pattern-match and generate AST nodes, as well as traverse the + AST. + +{1 This Guide} + +This guide is separated into several parts. + +First, we focus on the {{!driver}driver} that performs the AST transformations: +how it is generated, executed, and more importantly, what it does exactly, from +migrating the AST to the different rewriting phases it goes through. + +After that, we explain {{!"writing-ppxs"}the different kinds} of transformations +that [ppxlib] supports, and how to register them to the driver. This section only +describes the transformations and their properties, not how to actually +manipulate the AST. + +The part where we discuss how to manipulate the AST is split in three pages: +{{!"generating-code"}generating AST nodes} to generate OCaml code, +{{!"matching-code"}destructing AST nodes} to extract information and act +differently depending on what is extracted, and {{!"ast-traversal"}traversing +the AST} to use [fold], [iter], and [map] on the AST. This code-manipulation part +explains using the modules {{!Ppxlib.Ast_builder}[Ast_builder]}, {{!Ppxlib.Ast_pattern}[Ast_pattern]}, and the [ppxlib]'s +PPX {{!Ppxlib_metaquot}[Metaquot]}. + +We finally discuss several {{!"good-practices"}good practices}, such as how to +properly report errors, how to test your PPX, or how to migrate from other PPX +libraries, such as [OMP] and [ppx_deriving]. + +We end by including some {{!"examples"}examples}, which you can also find in the +[examples] {{:https://github.com/ocaml-ppx/ppxlib/tree/main/examples}folder} of +[ppxlib]'s repository. + +{1 PPXs and [ppxlib] History} + +The preprocessing history of OCaml started long before [ppxlib]. However, this +section is not yet written. You can find more information in these resources: +{{:https://lists.ocaml.org/pipermail/wg-camlp4/2013-January/000000.html}1} +{{:https://caml.inria.fr/pub/docs/manual-camlp4/manual002.html}2} +{{:https://camlp5.github.io/}3} +{{:https://discuss.ocaml.org/t/an-update-on-the-state-of-the-ppx-ecosystem-and-ppxlib-s-transition/8200}5}. +You can also contribute to the [ppxlib] documentation by opening a pull request in +the {{:https://github.com/ocaml-ppx/ppxlib/}repository}. + +{%html:
%}{%html:
%}{{!driver}The Driver >}{%html:
%} diff --git a/doc/writing-ppxs.mld b/doc/writing-ppxs.mld new file mode 100644 index 00000000..c8dba831 --- /dev/null +++ b/doc/writing-ppxs.mld @@ -0,0 +1,520 @@ +{%html:
%}{{!"driver"}< The Driver}{%html:
%}{{!"generating-code"}Generating AST nodes >}{%html:
%} + +{0 Writing a Transformation} + +This chapter covers the [ppxlib] procedure basics to define and register a +transformation, be it a global or a context-free transformation. + +For the actual manipulation and generation of code, [ppxlib] provides many helpers +that are listed in {!generatingcode}. + +{1 Defining a Transformation} + +For [ppxlib], a transformation is a description of a way to modify a given AST +into another one. A transformation can be: + +- A context-free transformation, which only acts on a portion of the AST. In the [ppxlib] framework, those transformations + are represented by values of type {{!Ppxlib.Context_free.Rule.t}[Context_free.Rule.t]} and are executed in the {{!driver."context-free-phase"}context-free phase}. This is the strongly recommended kind of transformation due to its {{!driver.advantages}important advantages}, such as good performance, well-defined composition semantics, and the safety and trustability that comes with well-isolated and strictly local modifications. +- A global transformation, which takes the simple form of a function of type + [structure -> structure] or [signature -> signature], that can sometimes take + extra information as additional arguments. Such a transformation is applied in + the {{!driver."global-transfo-phase"}global transformation phase}, unless it + has a good reason to have been registered in another phase. While global transformations are a flexible and powerful tool in the OCaml ecosystem, they come with many {{!global_transformation}drawbacks} and should only be used when really necessary. + +In order to register a transformation to the [ppxlib] driver, one should use the +{{!Ppxlib.Driver.V2.register_transformation}[Driver.V2.register_transformation]}. This function is used to register all +rewriter types in every different phase, except derivers, which are abstracted +away in {{!Ppxlib.Deriving}[Deriving]}. + +{1 Context-Free Transformation} + +In [ppxlib], the type for context-free transformation is +{{!Ppxlib.Context_free.Rule.t}[Context_free.Rule.t]}. Rules will be applied during the AST's top-down traverse +of the context-free pass. A rule contains the information about +when it should be applied in the traversal, as well as the transformation to +apply. + +Currently, rules can only be defined to apply in four different contexts: + +- on extensions points, such as [\[%ext_point payload\]] +- on some structure or signature items with an attribute, such as + [type t = Nil \[@@deriving show\]], +- on + {{:https://v2.ocaml.org/manual/extensionsyntax.html#ss:extension-literals} + litterals with modifiers}, such as [41g] or [43.2x], +- on function application or identifiers, such as [meta_function "99"] and [meta_constant]. + +In order to define rules on extensions points, we will use the {{!Ppxlib.Extension}[Extension]} +module. In order to define rules on attributed items, we will use the +{{!Ppxlib.Deriving}[Deriving]} module. For the two other rules, we will directly use the +{{!Ppxlib.Context_free.Rule}[Context_free.Rule]} module. + +{2 Extenders} + +An {{!driver.def_extenders}extender} is characterised by several things: + +{ul +{li The situation that triggers the rewriting, which consists of two things: + {ul + {li The extension points' name on which it is triggered. For instance, + an extender triggered on [[%name]] would not be triggered on [[%other_name]]} + {li The AST context on which it applies. Indeed, extension points can be used in + many different places: expression, pattern, core type, etc., and the extender + should be restricted to one context, as it produces code of a single type. So, + an extender triggered on expressions could be triggered on [let x = [%name]] + but not on [let [%name] = expr].}}} +{li The actual rewriting of the extension node: + {ul + {li A function, called "expander", taking arguments and outputting the generated AST} + {li How to extract from the payload the arguments to pass to the expander}} +}} + +{3:ext_context The Extender Context} + +The context is a value of type {{!Ppxlib.Extension.Context.t}[Extension.Context.t]}. For instance, to +define an extender for expression-extension points, the correct context is +{{!Ppxlib.Extension.Context.expression}[Extension.Context.expression]}. Consult the +{{!Ppxlib.Extension.Context}[Extension.Context]} module's API for the list of all contexts! + +{@ocaml[ + # let context = Extension.Context.expression;; + val context : expression Extension.Context.t = + Ppxlib.Extension.Context.Expression +]} + +{3 The Extender Name} + +The extension point name on which it applies is simply a string. + +{@ocaml[ + # let extender_name = "add_suffix" ;; + val extender_name : string = "add_suffix" +]} + +See below for examples on when the above name and context will trigger rewriting: +{@ocaml[ + (* will trigger rewriting: *) + let _ = [%add_suffix "payload"] + + (* won't trigger rewriting: *) + let _ = [%other_name "payload"] (* wrong name *) + let _ = match () with [%add_suffix "payload"] -> () (* wrong context *) +]} + +{3 The Payload Extraction} + +An extension node contains a {{!Ppxlib.Parsetree.payload}[payload]}, which will be passed to the transformation function. However, while this payload contains all information, it is not always structured the best way for the transformation function. For instance, in [[%add_suffix "payload"]], the string ["payload"] is encoded as a structure item consisting of an expression’s evaluation, a constant that is a string. + +[ppxlib] allows separating the transformation function from the extraction of the payload’s relevant information. As explained in depth in the {{!"matching-code"}Destructing AST nodes} chapter, this extraction is done by destructing the payload’s structure (which is therefore restricted: [[%add_suffix 12]] would be refused by the rewriter of the example below). The extraction is defined by a value of type +{{!Ppxlib.Ast_pattern.t}[Ast_pattern.t]}. The {{!Ppxlib.Ast_pattern}[Ast_pattern]} module provides some kind of pattern-matching on AST nodes: a way to structurally extract values from an AST +node in order to generate a value of another kind. + +For instance, a value of type +[(payload, int -> float -> expression, expression) Ast_pattern.t] means that it +defines a way to extract an [int] and a [float] from a {{!Ppxlib.Parsetree.payload}[payload]}, +which should be then combined to define a value of type {{!Ppxlib.Parsetree.expression}[expression]}. + +In our case, the matched value will always be a {{!Ppxlib.Parsetree.payload}[payload]}, as that's the type for extension points' payloads. The type of the +produced node will have to match the {{!ext_context}type of extension node we rewrite}, {{!Ppxlib.Parsetree.expression}[expression]} in our example. + +{@ocaml[ + # let extracter () = Ast_pattern.(single_expr_payload (estring __)) ;; + val extracter : unit -> (payload, string -> 'a, 'a) Ast_pattern.t = +]} + +The above pattern extracts a string inside an extension node pattern. It will extract ["string"] in the the extension node [[%ext_name "string"]] and will refuse [[%ext_name 1+1]]. For other ready-to-use examples of patterns, refer to the {{!"matching-code".pattern_examples}example} section. For more in-depth explanation on the types and functions used above, see the {{!"matching-code"}Destructing AST nodes} chapter and the {{!Ppxlib.Ast_pattern}[Ast_pattern] API} . + +The unit argument in [extractor] is not important. It is added so that {{:https://v2.ocaml.org/manual/polymorphism.html#ss:valuerestriction}value restriction} does not add noise to the type variables. + +{3 The Expand Function} + +The expander is the function that takes the values extracted from the +payload and produces the value that replaces the extension node. + +Building and inspecting AST nodes can be painful due to how +{{!Ppxlib.Parsetree}large} the AST type is. [ppxlib] provides several helper +modules to ease this generation, such as {{!Ppxlib.Ast_builder}[Ast_builder]}, +{!Ppxlib_metaquot}, {{!Ppxlib.Ast_pattern}[Ast_pattern]}, and {{!Ppxlib.Ast_traverse}[Ast_traverse]}, which are +explained in their own chapters: {{!"generating-code"}Generating AST nodes}, +{{!"matching-code"}Destructing AST nodes} and {{!"ast-traversal"}Traversing AST +nodes}. + +In the example below, you can ignore the body of the function until reading +those chapters. + +{@ocaml[ + # let expander ~ctxt s = + let loc = Expansion_context.Extension.extension_point_loc ctxt in + Ast_builder.Default.(estring ~loc (s ^ "_suffixed")) ;; + val expander : ctxt:Expansion_context.Extension.t -> string -> expression = + +]} + +The expander takes [ctxt] as a named argument that is ignored here. This +argument corresponds to additional information, such as the location of the +extension node. More precisely, it is of type +{{!Ppxlib.Expansion_context.Extension.t}[Expansion_context.Extension.t]} and +includes: + +- The location of the extension node +- The tool that called the rewriting ([merlin], [ocamlc], [ocaml], + [ocamlopt], etc.) +- The name of the input file given to the driver (see + {{!Ppxlib.Expansion_context.Base.input_name}[Expansion_context.Base.input_name]}) +- The [code_path] (see {{!Ppxlib.Expansion_context.Base.input_name}[Expansion_context.Base.input_name]} and + {{!Ppxlib.Code_path}[Code_path]}) + +{3 Declaring an Extender} + +When we have defined the four prerequisites, we are able to combine all of them to define an +extender using the {{!Ppxlib.Extension.V3.declare}[Extension.V3.declare]} function. + +{[ + # V3.declare ;; + string -> + 'context Context.t -> + (payload, 'a, 'context) Ast_pattern.t -> + (ctxt:Expansion_context.Extension.t -> 'a) -> + t +]} + +Note that the type is consistent: the context on which the expander is +applied and the value produced by the expander need to be equal (indeed, +['a] must be of the form ['extacted_1 -> 'extracted_2 -> ... -> 'context] +with the constraints given by {{!Ppxlib.Ast_pattern}[Ast_pattern]}). + +We are thus able to create the extender given by the previous examples: + +{[ + # let my_extender = Extension.V3.declare extender_name context (extracter()) expander ;; + val my_extender : Extension.t = +]} + +Note that we use the [V3] version of the [declare] function, which passes the +expansion context to the expander. Previous versions were kept for +retro-compatibility. + +We can finally turn the extender into a rule (using +{{!Ppxlib.Context_free.Rule.extension}[Context_free.Rule.extension]}) and register it to the driver: + +{[ + # let extender_rule = Context_free.Rule.extension my_extender ;; + val extender_rule : Context_free.Rule.t = + # Driver.register_transformation ~rules:[extender_rule] "name_only_for_debug_purpose" ;; + - : unit = () +]} + +Now, the following: + +{@ocaml[ + let () = print_endline [%add_suffix "helloworld"] +]} + +would be rewritten by the PPX in: + +{@ocaml[ + let () = print_endline "helloworld_suffixed" +]} + +{2 Derivers} + +A {{!driver.def_derivers}deriver} is characterised by several things: + +- The way to parse arguments passed through the attribute payload +- The set of other derivers that need to run before it is applied +- The actual generator function + +Contrary to extenders, the registration of the deriver as a +{{!Ppxlib.Context_free.Rule.t}[Context_free.Rule.t]} is not made by the user via +{{!Ppxlib.Driver.register_transformation}[Driver.register_transformation]}, but +rather by {{!Ppxlib.Deriving.add}[Deriving.add]}. + +{4 Derivers Arguments} + +In [ppxlib], a deriver is applied by adding an attribute containing the derivers' names +to apply: + +{[ + type tree = Leaf | Node of tree * tree [@@deriving show, yojson] +]} + +However, it is also possible to pass arguments to the derivers, either through a +record or through labelled arguments: + +{[ + type tree = Leaf | Node of tree * tree [@@deriving my_deriver ~flag ~option1:52] +]} + +or + +{[ + type tree = Leaf | Node of tree * tree [@@deriving my_deriver { flag; option1=52 }] +]} + +The [flag] argument is a flag, and it can only be present or absent but not take a +value. The [option1] argument is a regular argument, so it is also optional but can +take a value. + +In [ppxlib], arguments have the type {{!Ppxlib.Deriving.Args.t}[Deriving.Args.t]}. Similarly to the +{{!Ppxlib.Ast_pattern.t}[Ast_pattern.t]} type, a value of type [(int -> string -> structure, structure) Args.t] +means that it provides a way to extract an integer from the argument and +a string from the options, later combined to create a structure. + +The way to define a {{!Ppxlib.Deriving.Args.t}[Deriving.Args.t]} value is to start with the value describing an +empty set of arguments, {{!Ppxlib.Deriving.Args.empty}[Deriving.Args.empty]}. Then add the arguments one by one, using +the combinator {{!Ppxlib.Deriving.Args.(+>)}[Deriving.Args.(+>)]}. Each argument is created using either {{!Ppxlib.Deriving.Args.arg}[Deriving.Args.arg]} +for optional arguments (with value extracted using {{!Ppxlib.Ast_pattern}[Ast_pattern]}) or +{{!Ppxlib.Deriving.Args.flag}[Deriving.Args.flag]} for optional arguments without values. + +{@ocaml[ + # let args () = Deriving.Args.(empty +> arg "option1" (eint __) +> flag "flag") ;; + val args : (int option -> bool -> 'a, 'a) Deriving.Args.t = +]} + +{4 Derivers Dependency} + +[ppxlib] allows declaring that a deriver depends on the previous application of +another deriver. This is expressed simply as a list of derivers. For instance, +the {{:https://github.com/janestreet/ppx_csv_conv}csv} deriver depends on the +{{:https://github.com/janestreet/ppx_fields_conv}fields} deriver to run +first. + +{@ocaml[ + # let deps = [] ;; + val deps : 'a list = [] +]} + +In this example, we do not include any dependency. + +{3 Generator Function} + +Similarly to an extender's [expand] function, the function generating new +code in derivers also takes a context and the arguments extracted from the +attribute payload. Here again, the body of the example function can be +safely ignored ,as it relies on {{!"generating-code"}later chapters}. + +{@ocaml[ + # let generate_impl ~ctxt _ast option1 flag = + let return s = (* See "Generating code" chapter *) + let loc = Expansion_context.Deriver.derived_item_loc ctxt in + [ Ast_builder.Default.(pstr_eval ~loc (estring ~loc s) []) ] + in + if flag then return "flag is on" + else + match option1 with + | Some i -> return (Printf.sprintf "option is %d" i) + | None -> return "flag and option are not set" ;; + val generate_impl : + ctxt:Expansion_context.Deriver.t -> + 'a -> int option -> bool -> structure_item list = +]} + +Similarly to extenders, there is an additional (ignored +in the example) argument to the function: the context. This time, the context +is of type {{!Ppxlib.Expansion_context.Deriver.t}[Expansion_context.Deriver.t]} and includes: + +- The location of the derived item +- Whether the code generation will be inlined (see {!"inlining-transformations"}) +- The tool that called the rewriting ([merlin], [ocamlc], [ocaml], + [ocamlopt], etc.), +- The name of the input file given to the driver (see + {{!Ppxlib.Expansion_context.Base.input_name}[Expansion_context.Base.input_name]}) +- The [code_path] (see {{!Ppxlib.Expansion_context.Base.input_name}[Expansion_context.Base.input_name]} and + {{!Ppxlib.Code_path}[Code_path]}). + +{3 Registering a Deriver} + + +Once the generator function is defined, we can combine the argument extraction +and the generator function to create a +{{!Ppxlib.Deriving.Generator.t}[Deriving.Generator.t]}: + +{@ocaml[ + # let generator () = Deriving.Generator.V2.make (args()) generate_impl ;; + val generator : unit -> (structure_item list, 'a) Deriving.Generator.t = +]} + +This generator can then be registered as a deriver through the {{!Ppxlib.Deriving.add}[Deriving.add]} +function. Note that, {{!Ppxlib.Deriving.add}[Deriving.add]} will call {{!Ppxlib.Driver.register_transformation}[Driver.register_transformation]} +itself, so you won't need to do it manually. Adding a deriver is done in a way +that no two derivers with the same name can be registered. This includes derivers +registered through the {{:https://github.com/ocaml-ppx/ppx_deriving}ppx_deriving} library. + +{@ocaml[ + # let my_deriver = Deriving.add "my_deriver" ~str_type_decl:(generator()) ;; + val my_deriver : Deriving.t = +]} + +The different, optional named argument allows registering generators to be +applied in different contexts and in one function call. Remember that you +can only add one deriver with a given name, even if applied on different +contexts. As the API shows, derivers are restricted to being applied in the following contexts: + +- Type declarations ([type t = Foo of int]) +- Type extensions ([type t += Foo of int]) +- Exceptions ([exception E of int]) +- Module type declarations ([module type T = sig end]) + +in both structures and signatures. + +{2 Constant Rewriting} + +OCaml integrates a +{{:https://v2.ocaml.org/manual/extensionsyntax.html#ss:extension-literals}syntax} +to define special constants. Any [g..z] or [G..Z] suffix appended after a float +or int is accepted by the parser (but refused later by the compiler). This means +a PPX must rewrite them. + +[ppxlib] provides the {{!Ppxlib.Context_free.Rule.constant}[Context_free.Rule.constant]} function to rewrite those litteral +constants. The character (between [g] and [z] or [G] and [Z]) has to be +provided, as well as the constant kind (float or int), and both the location and +the litteral as a string will be passed to a rewriting function: + +{@ocaml[ + # let kind = Context_free.Rule.Constant_kind.Integer ;; + val kind : Context_free.Rule.Constant_kind.t = + Ppxlib.Context_free.Rule.Constant_kind.Integer + # let rewriter loc s = Ast_builder.Default.eint ~loc (int_of_string s * 100) ;; + val rewriter : location -> string -> expression = + # let rule = Context_free.Rule.constant kind 'g' rewriter ;; + val rule : Context_free.Rule.t = + # Driver.register_transformation ~rules:[ rule ] "constant" ;; + - : unit = () +]} + +As an example with the above transformation, [let x = 2g + 3g] will be +rewritten to [let x = 200 + 300]. + +{2 Special Functions} + +[ppxlib] supports registering functions to be applied at compile time. A registered identifier [f_macro] will trigger rewriting in two situations: +{ol +{li When it plays the role of the function in a function application} +{li Anywhere it appears in an expression} +} +For instance, in +{@ocaml[ + let _ = (f_macro arg1 arg2, f_macro) +]} + +the rewriting will be triggered once for the left-hand side [f_macro arg1 arg2] +and once for the right hand side [f_macro]. It is the expansion function that +is responsible for distinguishing between the two cases: using pattern-matching to +distinguish between a function application in one case and a single identifier in +the other. + +In order to register a special function, one needs to use {{!Ppxlib.Context_free.Rule.special_function}[Context_free.Rule.special_function]}, indicating the name of the special function and the rewriter. The rewriter will take the expression (without expansion context) and should output an [expression option], where: + +- [None] signifies that no rewriting should be done: the top-down pass can + continue (potentially inside the expression). +- [Some exp] signifies the original expression should be replaced by [expr]. The + top-down pass continues with [expr]. + +The difference between [fun expr -> None] and [fun expr -> Some expr] is that +the former will continue the top-down pass {e inside} [expr], while the latter +will continue the top-down pass from [expr] (included), therefore starting an +infinite loop. + +{@ocaml[ + # let expand e = + let return n = Some (Ast_builder.Default.eint ~loc:e.pexp_loc n) in + match e.pexp_desc with + | Pexp_apply (_, arg_list) -> return (List.length arg_list) + | _ -> return 0 + ;; + val expand : expression -> expression option = + # let rule = Context_free.Rule.special_function "n_args" expand ;; + val rule : Context_free.Rule.t = + # Driver.register_transformation ~rules:[ rule ] "special_function_demo" ;; + - : unit = () +]} + +With such a rewriter registered: +{[ + # Printf.printf "n_args is applied with %d arguments\n" (n_args ignored "arguments");; + n_args is applied with 2 arguments + - : unit = () +]} + +{1:global_transformation Global transformation} + +Global transformations are the most general kind of transformation. As such, they allow doing virtually any modifications, but this comes with several drawbacks. There are very few PPXs that really need this powerful but dangerous feature. In fact, even if, at first sight, it seems like your transformation isn't context-free, it's likely that you can find a more suitable abstraction with which it becomes context-free. Whenever that's the case, go for context-free! The mentioned drawbacks are: + +- It is harder for the user to know exactly what parts of the AST will be + changed. Your transformation becomes a scary black box. +- It is harder for [ppxlib] to combine several global transformations, as there is no + guarantee that the effect of one will work well with the effect of another. +- The job done by two global transformations (e.g., an AST traverse) cannot be + factorised, resulting in slower compilation time. +- If you don't make sure that you really follow all {{!"good-practices"}good practices}, you might end up messing up the global developer experience. + +- If you don't make sure that you really follow all {{!"good-practices"}good practices}, you might end up messing up the global developer experience. + +For all these reasons, a global transformation should be avoided whenever a +context-free transformation could do the job, which by experience seems to be most of the time. +The API for defining a global transformation is easy. A global transformation consists simply of the function and can be directly be registered with {{!Ppxlib.Driver.register_transformation}[Driver.register_transformation]}. + +{@ocaml[ + # let f str = List.filter (fun _ -> Random.bool ()) str;; (* Randomly omit structure items *) + val f : 'a list -> 'a list = + # Driver.register_transformation ~impl:f "absent_minded_transformation" + - : unit = () +]} + +{1 Inlining Transformations} + +When using a PPX, the transformation happens at compile time, and the produced code could be directly inlined into the original code. This allows dropping the dependency on [ppxlib] and the PPX used to generate the code. + +This mechanism is implemented for derivers implemented in [ppxlib] and is convenient to use, especially in conjunction with Dune. When applying a deriver, using [[@@deriving_inline deriver_name]] will apply the inline mode of [deriver_name] instead of the normal mode. + +Inline derivers will generate a [.corrected] version of the file that Dune can use to promote your file. For more information on how to use this feature to remove a dependency on [ppxlib] and a specific PPX from your project, refer to {{:https://ocaml.org/docs/metaprogramming#dropping-ppxs-dependency-with-derivinginline}this guide}. + +{1 Integration with Dune} + +If your PPX is written as a Dune project, you'll need to specify the [kind] +field in your [dune] file with one of the following two values: + +- [ppx_rewriter], or +- [ppx_deriver]. + +If your transformation is anything but a deriver (e.g. an extension node rewriter), use [ppx_rewriter]. If your transformation is a deriver, then the TLDR workflow is: use [ppx_deriver] and furthermore add [ppx_deriving] to your dependencies, i.e. to the [libraries] field of your dune file. In fact, the situation is quite a bit more complex, though: apart from applying the registered transformations, the Ppxlib driver also does several checks. One of those consists in checking the following: whenever the source code contains [\[@@deriving foo (...)\]], then the Ppxlib driver expects a driver named [foo] to be registered. That's helpful to catch typos and missing dependencies on derivers and is certainly more hygienic than silently ignoring the annotation. However, for that check to work, the registered derivers must be grouped together into one process, i.e. a driver. UTop cannot use a static driver such as the Ppxlib one because dependencies are added dynamically to a UTop session. So the solution is the following: if you use [ppx_deriver] in your [kind] field, dune will add the right data to your PPXs META file to ensure that UTop will use the [ppx_deriving] driver, which links the derivers dynamically. As a result, [ppx_derivng] appears as a dependency in the META file. Therefore, whenever a user uses [ocamlfind] (e.g. by using UTop), they will hit an "[ppx_derivng] not found" error, unless you define [ppx_deriving] in your dependencies. So, long story short: if you strongly care about avoiding [ppx_deriving] as a dependency, use [ppx_rewriter] in your [kind] field and be aware of the fact that users won't be able to try your deriver in UTop; otherwise do the TLDR workflow. + +Here is a minimal Dune stanza for a rewriter: + +{@dune[ + (library + (public_name my_ppx_rewriter) + (kind ppx_rewriter) + (libraries ppxlib)) +]} + +The public name you chose is the name your users will refer to your PPX in +the [preprocess] field. For example, to use this PPX rewriter, one would add the +[(preprocess (pps my_ppx_rewriter))] to their [library] or [executable] stanza. + +{1:generatingcode Defining AST Transformations} + +In this chapter, we only focused on the [ppxlib] ceremony to declare all kinds +of transformations. However, we did not cover how to write the actual +generative function, the backbone of the transformation. [ppxlib] provides several +modules to help with code generation and matching, which are covered in more +depth in the next chapters of this documentation: + +- {{!Ppxlib.Ast_traverse}[Ast_traverse]}, which helps in defining AST traversals, such as maps, folds, + iter, etc. +- {{!Ppxlib.Ast_helper}[Ast_helper]} and {{!Ppxlib.Ast_builder}[Ast_builder]}, for generating AST nodes in a simpler way than + directly dealing with the {{!Ppxlib.Parsetree}[Parsetree]} types, providing a more stable API. +- {{!Ppxlib.Ast_pattern}[Ast_pattern]}, the sibling of {{!Ppxlib.Ast_builder}[Ast_builder]} for matching on AST nodes, + extracting values for them. +- {!Ppxlib_metaquot}, a PPX to manipulate code more simply by quoting and unquoting + code. + +This documentation also includes some {{!"good-practices"}guidelines} on how to generate nice code. +We encourage you to read and follow it to produce high quality PPXs: + +- A section on good {{!page-"good-practices"."handling_errors"}error reporting} +- A section on the {{!page-"good-practices"."quoting"}mechanism} +- A section on how to {{!page-"good-practices"."testing-your-ppx"}test} your PPX +- A section on how to collaborate with Merlin effectively by being careful with {{!page-"good-practices"."testing-your-ppx"}locations} + +{%html:
%}{{!"driver"}< The Driver}{%html:
%}{{!"generating-code"}Generating AST nodes >}{%html:
%} diff --git a/src/ast_builder.mli b/src/ast_builder.mli index b1d4d518..4df5368d 100644 --- a/src/ast_builder.mli +++ b/src/ast_builder.mli @@ -1,81 +1,15 @@ -(** Helpers for build OCaml AST fragments *) +(** [Ast_builder] is a module to generate OCaml AST fragments. It provides a + shorter syntax than directly using the {!Parsetree} constructors, as well as + a better stability than the constructors. *) open! Import -(** This module is similar to the [Ast_helper] module distributed with OCaml but - uses different conventions. - - {3 Locations} - - [Ast_helper] uses a global variable for the default locations, we found that - to it makes it quite easy to mess up locations. Instead this modules forces - you to provide a location argument. - - For building fragment using the same location everywhere, a functor is - provided. - - {3 Naming} - - The names match the [Parsetree] names closely, which makes it easy to build - AST fragments by just knowing the [Parsetree]. - - For types of the form a wrapper record with a [_desc] field, helpers are - generated for each constructor constructing the record directly. For - instance for the type [Parsetree.expression]: - - {[ - type expression = - { pexp_desc : expression_desc - ; pexp_loc : Location.t - ; pexp_attributes : attributes - } - - and expression_desc = - | Pexp_ident of Longident.t loc - | Pexp_constant of constant - | Pexp_let of rec_flag * value_binding list * expression - ... - ]} - - The following helpers are created: - - {[ - val pexp_ident : loc:Location.t -> Longident.t Located.t -> expression - val pexp_constant : loc:Location.t -> constant -> expression - val pexp_let : loc:Location.t -> rec_flag -> value_binding list -> expression - ... - ]} - - For other record types, such as type_declaration, we have the following - helper: - - {[ - type type_declaration = - { ptype_name : string Located.t - ; ptype_params : (core_type * variance) list - ; ptype_cstrs : (core_type * core_type * Location.t) list - ; ptype_kind : type_kind - ; ptype_private : private_flag - ; ptype_manifest : core_type option - ; ptype_attributes : attributes - ; ptype_loc : Location.t - } - - - val type_declaration - : loc : Location.t - -> name : string Located.t - -> params : (core_type * variance) list - -> cstrs : (core_type * core_type * Location.t) list - -> kind : type_kind - -> private : private_flag - -> manifest : core_type option - -> type_declaration - ]} - - Attributes are always set to the empty list. If you want to set them you - have to override the field with the [{ e with pexp_attributes = ... }] - notation. *) +(** {1 Link to the tutorial} + + For a detailed explanation on this module, refer to the + {{!"generating-code".ast_builder} relevant} part of the manual. + + {1 API} *) (** Helpers taking a [~loc] argument. This module is meant to be opened or aliased. *) diff --git a/src/ast_pattern.mli b/src/ast_pattern.mli index 9c609f67..20783031 100644 --- a/src/ast_pattern.mli +++ b/src/ast_pattern.mli @@ -1,102 +1,16 @@ -(** First class AST patterns *) +(** This module implements first class AST patterns. It allows to destruct and + extract values from AST fragments. This gives the same functionality as a + pattern-match, but with simpler syntax and more stability than directly + pattern-matching on the {!Parsetree} constructors. *) open! Import -(** PPX rewriters often need to recognize fragments the OCaml AST, for instance - to parse the payload of an attribute/expression. You can do that with a - pattern matching and manual error reporting when the input is not what you - expect but this has proven to quickly become extremely verbose and - unreadable. +(** {1 Link to the tutorial} - This module aims to help with that by providing first class AST patterns. + For a detailed explanation on this module, refer to the + {{!"matching-code".ast_pattern_intro} relevant} part of the manual. - To understand how to use it, let's consider the example of ppx_inline_test. - We want to recognize patterns of the form: - - {[ - let%test "name" = expr - ]} - - Which is a syntactic sugar for: - - {[ - [%%test let "name" = expr] - ]} - - If we wanted to write a function that recognizes the payload of [%%test] - using normal pattern matching we would write: - - {[ - let match_payload = function - | PStr - [ - { - pstr_desc = - Pstr_value - ( Nonrecursive, - [ - { - pvb_pat = - { - ppat_desc = - Ppat_constant (Pconst_string (name, _, None)); - _; - }; - pvb_expr = e; - _; - }; - ] ); - _; - }; - ] -> - (name, e) - | _ -> Location.raise_errorf "" - ]} - - This is quite cumbersome, and this is still not right: this function drops - all attributes without notice. - - Now let's imagine we wanted to construct the payload instead, using - [Ast_builder] one would write: - - {[ - let build_payload ~loc name expr = - let (module B) = Ast_builder.make loc in - let open B in - Parsetree.PStr - [ - pstr_value Nonrecursive [ value_binding ~pat:(pstring name) ~expr ]; - ] - ]} - - Constructing a first class pattern is almost as simple as replacing - [Ast_builder] by [Ast_pattern]: - - {[ - let payload_pattern () = - let open Ast_pattern in - pstr - (pstr_value nonrecursive - (value_binding ~pat:(pstring __) ~expr:__ ^:: nil) - ^:: nil) - ]} - - Notice that the place-holders for [name] and [expr] have been replaced by - [__]. An extra unit argument appears because of value restriction. The - function above would create a pattern with type: - - {[ - (payload, string -> expression -> 'a, 'a) Ast_pattern.t - ]} - - which means that it matches values of type [payload] and captures a string - and expression from it. The two captured elements comes from the use of - [__]. - - An empty payload (e.g. for an attribute that has no payload) is matched by - [Ast_pattern.(pstr nil)]. A payload with exactly one expression (e.g. to - specify a custom function in a deriver) is matched by - [Ast_pattern.(single_expr_payload __)]. *) + {1 API} *) type ('a, 'b, 'c) t = ('a, 'b, 'c) Ast_pattern0.t (** Type of a pattern: diff --git a/src/ast_traverse.mli b/src/ast_traverse.mli index 23526d22..e6c7a23e 100644 --- a/src/ast_traverse.mli +++ b/src/ast_traverse.mli @@ -1,4 +1,12 @@ -(** AST traversal classes *) +(** This module provides AST traversal classes, such as maps, iterations, folds, + etc. on the {!Parsetree} types. + + {1 Link to the tutorial} + + For a detailed explanation on this module, refer to the {{!"ast-traversal"} + relevant} part of the manual. + + {1 API} *) open! Import diff --git a/src/attribute.mli b/src/attribute.mli index 5cbd1a1a..1007d102 100644 --- a/src/attribute.mli +++ b/src/attribute.mli @@ -1,5 +1,3 @@ -(** Attribute hygiene *) - (** This module provides hygiene for attributes. The goal is to report misuses of attributes to the user as soon as possible so that no mistyped attribute get silently ignored. *) diff --git a/src/caller_id.ml b/src/caller_id.ml index f9efd0a9..0c354eb8 100644 --- a/src/caller_id.ml +++ b/src/caller_id.ml @@ -1,8 +1,8 @@ +(** Small helper to find out who is the caller of a function *) + open! Import module Printexc = Caml.Printexc -(* Small helper to find out who is the caller of a function *) - type t = Printexc.location option let get ~skip = diff --git a/src/code_path.mli b/src/code_path.mli index 7fdb56ca..f269e9e8 100644 --- a/src/code_path.mli +++ b/src/code_path.mli @@ -1,3 +1,6 @@ +(** This module contains type and functions for representing and manipulating + path to AST nodes. *) + open! Import type t diff --git a/src/common.mli b/src/common.mli index 00d0fccb..765adf5a 100644 --- a/src/common.mli +++ b/src/common.mli @@ -1,3 +1,5 @@ +(** Common functions *) + open! Import val lident : string -> Longident.t diff --git a/src/context_free.mli b/src/context_free.mli index 2d677db8..1979ceb8 100644 --- a/src/context_free.mli +++ b/src/context_free.mli @@ -1,4 +1,5 @@ -(** Context free rewriting *) +(** Context free rewriting, to define local rewriting rules that will all be + applied at once by the driver. *) open! Import diff --git a/src/deriving.mli b/src/deriving.mli index e1377873..14f7d11b 100644 --- a/src/deriving.mli +++ b/src/deriving.mli @@ -1,4 +1,4 @@ -(** Deriving code from type declarations *) +(** Deriving code from type declarations. *) open Import diff --git a/src/driver.mli b/src/driver.mli index 929a872d..cdc7061d 100644 --- a/src/driver.mli +++ b/src/driver.mli @@ -1,3 +1,9 @@ +(** Interaction with the driver, such as getting/seeting cookies, adding + arguments. + + The relevant part in the manual is {{!driver.driver_execution} the section + on its execution}. *) + open Import val add_arg : Caml.Arg.key -> Caml.Arg.spec -> doc:string -> unit @@ -87,25 +93,11 @@ val register_transformation : unit (** [register_transformation name] registers a code transformation. - [name] is a logical name for the transformation (such as [sexp_conv] or - [bin_prot]). It is mostly used for debugging purposes. + [name] is a logical name for the set of transformations (such as [sexp_conv] + or [bin_prot]). It is mostly used for debugging purposes. [rules] is a list of context independent rewriting rules, such as extension - point expanders. This is what most code transformation should use. Rules - from all registered transformations are all applied at the same time, before - any other transformations. Moreover they are applied in a top-down manner, - giving more control to extensions on how they interpret their payload. - - For instance: - - - some extensions capture a pretty-print of the payload in their expansion - and using top-down ensures that the payload is as close as possible to the - original code - - some extensions process other extension in a special way inside their - payload. For instance [%here] (from ppx_here) will normally expand to a - record of type [Lexing.position]. However when used inside [%sexp] (from - ppx_sexp_value) it will expand to the human-readable sexp representation - of a source code position. + point expanders. This is what most code transformation should use. [extensions] is a special cases of [rules] and is deprecated. It is only kept for backward compatibility. @@ -123,11 +115,6 @@ val register_transformation : when the other mechanism are not enough. For instance if the transformation expands extension points that depend on the context. - If no rewriter is using [impl] and [intf], then the whole transformation is - completely independent of the order in which the various rewriter are - specified. Moreover the resulting driver will be faster as it will do only - one pass (excluding safety checks) on the whole AST. - [lint_impl] and [lint_intf] are applied to the unprocessed source. Errors they return will be reported to the user as preprocessor warnings. @@ -136,17 +123,8 @@ val register_transformation : [impl] is that you can specify if it should be applied before or after all rewriters defined through [rules], [impl] or [intf] are applied. - Rewritings are applied in the following order: - - - linters ([lint_impl], [lint_intf]) - - preprocessing ([preprocess_impl], [preprocess_intf]) - - "before" instrumentations ([instrument], where instrument = - [Instrument.make ~position:Before (...)]) - - context-independent rules ([rules], [extensions]) - - non-instrumentation whole-file transformations ([impl], [intf], - [enclose_impl], [enclose_intf]) - - "after" instrumentations ([instrument], where instrument = - [Instrument.make ~position:After (...)]) *) + More information on each phase, and their relative order, can be found in + the {{!driver.driver_execution} manual}. *) val register_transformation_using_ocaml_current_ast : ?impl: diff --git a/src/expansion_context.mli b/src/expansion_context.mli index fb770d7f..78bc41cd 100644 --- a/src/expansion_context.mli +++ b/src/expansion_context.mli @@ -1,3 +1,5 @@ +(** The context given to rewriting rules when expanding. *) + module Base : sig type t (** Type for the location independent parts of the expansion context *) diff --git a/src/expansion_helpers.ml b/src/expansion_helpers.ml index 6fe74bb7..a30aa7b3 100644 --- a/src/expansion_helpers.ml +++ b/src/expansion_helpers.ml @@ -21,3 +21,5 @@ let mangle_lid ?fixpoint affix lid = | Lident s -> Lident (mangle ?fixpoint affix s) | Ldot (p, s) -> Ldot (p, mangle ?fixpoint affix s) | Lapply _ -> invalid_arg "Ppxlib.Expansion_helpers.mangle_lid: Lapply" + +module Quoter = Quoter diff --git a/src/expansion_helpers.mli b/src/expansion_helpers.mli index 4b899e89..7b266089 100644 --- a/src/expansion_helpers.mli +++ b/src/expansion_helpers.mli @@ -1,10 +1,13 @@ -(** Various helpers for expansion. *) +(** Various helpers for expansion, such as quoting expressions in their context, + or mangling names. *) open Import (** {2 Mangling} *) -(** Derive mangled names from type names in a deriver. *) +(** Derive mangled names from type names in a deriver. For instance, the [t] can + be turned into [t_of_yojson] or [yojson_of_t] with the functions from this + module. *) (** Specification for name mangling. *) type affix = @@ -25,3 +28,7 @@ val mangle_type_decl : ?fixpoint:string -> affix -> type_declaration -> string val mangle_lid : ?fixpoint:string -> affix -> Longident.t -> Longident.t (** [mangle_lid ~fixpoint affix lid] does the same as {!mangle}, but for the last component of [lid]. *) + +(** {2 Quoting} *) + +module Quoter = Quoter diff --git a/src/extension.mli b/src/extension.mli index e31e9f39..41c7235a 100644 --- a/src/extension.mli +++ b/src/extension.mli @@ -1,3 +1,5 @@ +(** Declare extenders to rewrite extension nodes. *) + open! Import type (_, _) equality = Eq : ('a, 'a) equality | Ne : (_, _) equality diff --git a/src/keyword.mli b/src/keyword.mli index 36c490f6..0423be04 100644 --- a/src/keyword.mli +++ b/src/keyword.mli @@ -1,2 +1,4 @@ +(** Small module to check if a string is an OCaml keyword. *) + val is_keyword : string -> bool (** Check if a string is an OCaml keyword. *) diff --git a/src/location.mli b/src/location.mli index f87b8e04..642b9ab7 100644 --- a/src/location.mli +++ b/src/location.mli @@ -1,7 +1,10 @@ -(** Overrides the Location module of OCaml *) +(** Overrides the [Location] module of OCaml *) (** There are less functions in this module. However the API should be more - stable than the Location module of OCaml. *) + stable than the Location module of OCaml. + + For a detailled presentation of good practices regarding locations, refer to + the {{!"good-practices".resp_loc} section} in the manual. *) open! Import @@ -26,7 +29,8 @@ val init : Lexing.lexbuf -> string -> unit val raise_errorf : ?loc:t -> ('a, Caml.Format.formatter, unit, 'b) format4 -> 'a (** Raise a located error. Should be avoided as much as possible, in favor of - {!error_extensionf}. *) + {!error_extensionf}. See the {{!"good-practices".handling_errors} relevant} + part of the tutorial. *) val of_lexbuf : Lexing.lexbuf -> t (** Return the location corresponding to the last matched regular expression *) @@ -46,6 +50,10 @@ val compare : t -> t -> int module Error : sig type location = t + + (** For a detailed explanation on error reporting, refer to the + {{!"good-practices".handling_errors} relevant} part of the tutorial.*) + type t val make : loc:location -> string -> sub:(location * string) list -> t @@ -82,6 +90,9 @@ with type location := t val error_extensionf : loc:t -> ('a, Format.formatter, unit, extension) format4 -> 'a (** Returns an error extension node. When encountered in the AST, the compiler - recognizes it and displays the error properly. *) + recognizes it and displays the error properly. + + For a detailed explanation on error reporting, refer to the + {{!"good-practices".handling_errors} relevant} part of the tutorial. *) exception Error of Error.t diff --git a/src/longident.mli b/src/longident.mli index 038145ee..ad02a56f 100644 --- a/src/longident.mli +++ b/src/longident.mli @@ -1,4 +1,4 @@ -(** Overrides the Longident module of OCaml *) +(** Overrides the [Longident] module of OCaml *) open! Import diff --git a/src/merlin_helpers.mli b/src/merlin_helpers.mli index 72eee66a..bb6a942f 100644 --- a/src/merlin_helpers.mli +++ b/src/merlin_helpers.mli @@ -1,5 +1,8 @@ (** Some helpers to annotate the AST so merlin can decide which branches to look - at and which branches to ignore. *) + at and which branches to ignore. + + More information can be found in the section of the manual about + {{!"good-practices".resp_loc} locations}. *) open! Import diff --git a/src/name.mli b/src/name.mli index d582e57a..d8b5c25a 100644 --- a/src/name.mli +++ b/src/name.mli @@ -98,6 +98,8 @@ module Allowlisted : sig end module Reserved_namespaces : sig + (** Small module to reserve namespaces in attribute names. *) + val reserve : string -> unit (** [reserve "foo"] has two implications: diff --git a/src/ppxlib.ml b/src/ppxlib.ml index e8249986..d354da57 100644 --- a/src/ppxlib.ml +++ b/src/ppxlib.ml @@ -1,5 +1,79 @@ (** Standard library for ppx rewriters *) +(** [ppxlib] is meant to be opened globally in your PPX source files. + + Opening it comes with two advantages. First, it will shadow the + [compiler-libs] modules. The [compiler-libs] modules are unstable and aren't + meant to be used, so shadowing them is a good protection mechanism. In case + you don't want to open [Ppxlib], you can open [Ocaml_shadow] to get the same + protection. Second, it will bring several modules in scope, that are useful + to have when writing a rewriter: + + - The main [ppxlib] modules, such as modules to help manipulate the AST + ({!Ast_builder}, {!Ast_pattern}), and a few functions. + - Modules from other libraries, such as {!Ast_helper} or {!Pprintast}, + - The whole AST types (by [including] the {!Ast} module). + + {1 The core [ppxlib] entries} *) + +(** {2 Manipulating the AST} *) + +module Ast_builder = Ast_builder +module Ast_pattern = Ast_pattern +module Ast_traverse = Ast_traverse + +(** {2 Context-free rewriting} *) + +module Context_free = Context_free +module Deriving = Deriving +module Extension = Extension +module Expansion_context = Expansion_context +module Code_path = Code_path + +(** {2 Other helpers} *) + +module Expansion_helpers = Expansion_helpers +module Merlin_helpers = Merlin_helpers +module Spellcheck = Spellcheck +module Keyword = Keyword + +(** {2 Driver-related modules} *) + +module Driver = Driver +module Caller_id = Caller_id +module Ast_io = Utils.Ast_io.Read_bin + +(** {2 Checks} *) + +module Attribute = Attribute +module Reserved_namespaces = Name.Reserved_namespaces + +(** {2 Common helper functions} *) + +include Common + +(** {1 Modules from other libraries} + + Expose some modules from {!Ppxlib_ast}. *) + +module Ast = Ppxlib_ast.Ast +module Ast_helper = Ppxlib_ast.Ast_helper +module Asttypes = Ppxlib_ast.Asttypes +module Parse = Ppxlib_ast.Parse +module Parsetree = Ppxlib_ast.Parsetree +module Pprintast = Ppxlib_ast.Pprintast +module Selected_ast = Ppxlib_ast.Selected_ast +module Location = Location +module Longident = Longident +module Loc = Loc + +(** {1 The whole AST types} *) + +include Ast +(** Include all the Ast definitions since we need them in every single ppx + + @closed *) + (** Make sure code using Ppxlib doesn't refer to compiler-libs without being explicit about it: @@ -27,47 +101,6 @@ include struct with module Syntaxerr := Syntaxerr) end -(** Expose some modules from {!Ppxlib_ast}; in particular, overwrite some of the - modules above *) - -module Ast = Ppxlib_ast.Ast -module Ast_helper = Ppxlib_ast.Ast_helper -module Asttypes = Ppxlib_ast.Asttypes -module Parse = Ppxlib_ast.Parse -module Parsetree = Ppxlib_ast.Parsetree -module Pprintast = Ppxlib_ast.Pprintast -module Selected_ast = Ppxlib_ast.Selected_ast - -include Ast -(** Include all the Ast definitions since we need them in every single ppx - - @closed *) - -module Ast_builder = Ast_builder -module Ast_pattern = Ast_pattern -module Ast_traverse = Ast_traverse -module Attribute = Attribute -module Code_path = Code_path -module Caller_id = Caller_id -module Context_free = Context_free -module Deriving = Deriving -module Driver = Driver -module Expansion_context = Expansion_context -module Expansion_helpers = Expansion_helpers -module Extension = Extension -module Keyword = Keyword -module Loc = Loc -module Location = Location -module Longident = Longident -module Merlin_helpers = Merlin_helpers -module Reserved_namespaces = Name.Reserved_namespaces -module Spellcheck = Spellcheck -module Quoter = Quoter -module Ast_io = Utils.Ast_io.Read_bin - -include Common -(** @closed *) - (**/**) (* For tests and Ppx_core compatibility layer *) diff --git a/src/quoter.mli b/src/quoter.mli index 9f18d050..2c8baf05 100644 --- a/src/quoter.mli +++ b/src/quoter.mli @@ -3,7 +3,10 @@ The idea is that whenever we want to refer to an expression in generated code we first quote it. The result will be an identifier that is guaranteed to refer to the expression it was created from. This way it is impossible - for quoted fragments to refer to newly introduced expressions. *) + for quoted fragments to refer to newly introduced expressions. + + For more information, see the {{!"good-practices".quoting} section on + quoting} in the good practices section. *) open Import diff --git a/src/spellcheck.ml b/src/spellcheck.ml index b766e17e..81242883 100644 --- a/src/spellcheck.ml +++ b/src/spellcheck.ml @@ -1,3 +1,5 @@ +(** Helpers to provide hints to PPX users for typos or spellchecks. *) + open! Import exception Cutoff_met diff --git a/src/utils.mli b/src/utils.mli index 5c50581d..88157dcc 100644 --- a/src/utils.mli +++ b/src/utils.mli @@ -42,6 +42,8 @@ module Ast_io : sig val write : out_channel -> t -> add_ppx_context:bool -> unit module Read_bin : sig + (** A small module to help read [bin-annots] generated files. *) + type ast = Intf of signature | Impl of structure type t diff --git a/test/quoter/test.ml b/test/quoter/test.ml index 0d6921cd..33ca4cb5 100644 --- a/test/quoter/test.ml +++ b/test/quoter/test.ml @@ -1,6 +1,7 @@ #require "base";; open Ppxlib +open Expansion_helpers module Ast = Ast_builder.Default [%%expect{|