From 3da6102a4256974cacb42a34443c0f8a01458af4 Mon Sep 17 00:00:00 2001 From: Marius Conjeaud Date: Fri, 25 Oct 2024 16:05:55 +0200 Subject: [PATCH] Add Advanced query operations ; add interlinks --- doc/source/advanced_query_operations.rst | 103 +++++++++ doc/source/cypher.rst | 12 ++ doc/source/filtering_ordering.rst | 2 +- doc/source/getting_started.rst | 54 ++--- doc/source/index.rst | 4 +- doc/source/queries.rst | 258 ----------------------- doc/source/traversal.rst | 4 +- 7 files changed, 144 insertions(+), 293 deletions(-) create mode 100644 doc/source/advanced_query_operations.rst delete mode 100644 doc/source/queries.rst diff --git a/doc/source/advanced_query_operations.rst b/doc/source/advanced_query_operations.rst new file mode 100644 index 00000000..16c93427 --- /dev/null +++ b/doc/source/advanced_query_operations.rst @@ -0,0 +1,103 @@ +.. _Advanced query operations: + +========================= +Advanced query operations +========================= + +neomodel provides ways to enhance your queries beyond filtering and traversals. + +Annotate - Aliasing +------------------- + +The `annotate` method allows you to add transformations to your elements. To learn more about the available transformations, keep reading this section. + +Aggregations +------------ + +neomodel implements some of the aggregation methods available in Cypher: + +- Collect +- Last + +These are usable in this way:: + + from neomodel.sync_match import Collect, Last + + # distinct is optional, and defaults to False. When true, objects are deduplicated + Supplier.nodes.traverse_relations(available_species="coffees__species") + .annotate(Collect("available_species", distinct=True)) + .all() + + # Last is used to get the last element of a list + Supplier.nodes.traverse_relations(available_species="coffees__species") + .annotate(Last(Collect("last_species"))) + .all() + +.. note:: + Using the Last() method right after a Collect() without having set an ordering will return the last element in the list as it was returned by the database. + + This is probably not what you want ; which means you must provide an explicit ordering. To do so, you cannot neomodel's order_by method, but need an intermediate transformation step (see below). + + This is because the order_by method adds ordering as the very last step of the Cypher query ; whereas in the present example, you want to first order Species, then get the last one, and then finally return your results. In other words, you need an intermediate WITH Cypher clause. + +Intermediate transformations +---------------------------- + +The `intermediate_transform` method basically allows you to add a WITH clause to your query. This is useful when you need to perform some operations on your results before returning them. + +As discussed in the note above, this is for example useful when you need to order your results before applying an aggregation method, like so:: + + from neomodel.sync_match import Collect, Last + + # This will return all Coffee nodes, with their most expensive supplier + Coffee.nodes.traverse_relations(suppliers="suppliers") + .intermediate_transform( + {"suppliers": "suppliers"}, ordering=["suppliers.delivery_cost"] + ) + .annotate(supps=Last(Collect("suppliers"))) + +Subqueries +---------- + +The `subquery` method allows you to perform a `Cypher subquery `_ inside your query. This allows you to perform operations in isolation to the rest of your query:: + + from neomodel.sync_match import Collect, Last + + # This will create a CALL{} subquery + # And return a variable named supps usable in the rest of your query + Coffee.nodes.subquery( + Coffee.nodes.traverse_relations(suppliers="suppliers") + .intermediate_transform( + {"suppliers": "suppliers"}, ordering=["suppliers.delivery_cost"] + ) + .annotate(supps=Last(Collect("suppliers"))), + ["supps"], + ) + +Helpers +------- + +Reading the sections above, you may have noticed that we used explicit aliasing in the examples, as in:: + + traverse_relations(suppliers="suppliers") + +This allows you to reference the generated Cypher variables in your transformation steps, for example:: + + traverse_relations(suppliers="suppliers").annotate(Collect("suppliers")) + +In some cases though, it is not possible to set explicit aliases, for example when using `fetch_relations`. In these cases, neomodel provides `resolver` methods, so you do not have to guess the name of the variable in the generated Cypher. Those are `NodeNameResolver` and `RelationshipNameResolver`. For example:: + + from neomodel.sync_match import Collect, NodeNameResolver, RelationshipNameResolver + + Supplier.nodes.fetch_relations("coffees__species") + .annotate( + all_species=Collect(NodeNameResolver("coffees__species"), distinct=True), + all_species_rels=Collect( + RelationNameResolver("coffees__species"), distinct=True + ), + ) + .all() + +.. note:: + + When using the resolvers in combination with a traversal as in the example above, it will resolve the variable name of the last element in the traversal - the Species node for NodeNameResolver, and Coffee--Species relationship for RelationshipNameResolver. \ No newline at end of file diff --git a/doc/source/cypher.rst b/doc/source/cypher.rst index f8c7ccaf..37ebcbf1 100644 --- a/doc/source/cypher.rst +++ b/doc/source/cypher.rst @@ -24,6 +24,18 @@ Outside of a `StructuredNode`:: The ``resolve_objects`` parameter automatically inflates the returned nodes to their defined classes (this is turned **off** by default). See :ref:`automatic_class_resolution` for details and possible pitfalls. +You canalso retrieve a whole path of already instantiated objects corresponding to +the nodes and relationship classes with a single query:: + + q = db.cypher_query("MATCH p=(:CityOfResidence)<-[:LIVES_IN]-(:PersonOfInterest)-[:IS_FROM]->(:CountryOfOrigin) RETURN p LIMIT 1", + resolve_objects = True) + +Notice here that ``resolve_objects`` is set to ``True``. This results in ``q`` being a +list of ``result, result_name`` and ``q[0][0][0]`` being a ``NeomodelPath`` object. + +``NeomodelPath`` ``nodes, relationships`` attributes contain already instantiated objects of the +nodes and relationships in the query, *in order of appearance*. + Integrations ============ diff --git a/doc/source/filtering_ordering.rst b/doc/source/filtering_ordering.rst index 3dee395f..3b1873a0 100644 --- a/doc/source/filtering_ordering.rst +++ b/doc/source/filtering_ordering.rst @@ -120,7 +120,7 @@ The `has` method checks for existence of (one or more) relationships, in this ca This can be negated by setting `suppliers=False`, to find `Coffee` nodes without `suppliers`. -You can also filter on the existence of more complex traversals by using the `traverse_relations` method. (ADD LINK TO DOC) +You can also filter on the existence of more complex traversals by using the `traverse_relations` method. See :ref:`Path traversal`. Ordering ======== diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst index 6e8a5aa0..82a0023b 100644 --- a/doc/source/getting_started.rst +++ b/doc/source/getting_started.rst @@ -193,6 +193,28 @@ simply returning the node IDs rather than every attribute associated with that N # Return set of nodes people = Person.nodes.filter(age__gt=3) +Iteration, slicing and more +--------------------------- + +Iteration, slicing and counting is also supported:: + + # Iterable + for coffee in Coffee.nodes: + print coffee.name + + # Sliceable using python slice syntax + coffee = Coffee.nodes.filter(price__gt=2)[2:] + +The slice syntax returns a NodeSet object which can in turn be chained. + +Length and boolean methods do not return NodeSet objects and cannot be chained further:: + + # Count with __len__ + print len(Coffee.nodes.filter(price__gt=2)) + + if Coffee.nodes: + print "We have coffee nodes!" + Relationships ============= @@ -236,38 +258,6 @@ Working with relationships:: Retrieving additional relations =============================== -To avoid queries multiplication, you have the possibility to retrieve -additional relations with a single call:: - - # The following call will generate one MATCH with traversal per - # item in .fetch_relations() call - results = Person.nodes.fetch_relations('country').all() - for result in results: - print(result[0]) # Person - print(result[1]) # associated Country - -You can traverse more than one hop in your relations using the -following syntax:: - - # Go from person to City then Country - Person.nodes.fetch_relations('city__country').all() - -You can also force the use of an ``OPTIONAL MATCH`` statement using -the following syntax:: - - from neomodel.match import Optional - - results = Person.nodes.fetch_relations(Optional('country')).all() - -.. note:: - - Any relationship that you intend to traverse using this method **MUST have a model defined**, even if only the default StructuredRel, like:: - - class Person(StructuredNode): - country = RelationshipTo(Country, 'IS_FROM', model=StructuredRel) - - Otherwise, neomodel will not be able to determine which relationship model to resolve into, and will fail. - .. note:: You can fetch one or more relations within the same call diff --git a/doc/source/index.rst b/doc/source/index.rst index 91a728c0..068e2d93 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -74,7 +74,9 @@ Contents properties spatial_properties schema_management - queries + filtering_ordering + traversal + advanced_query_operations cypher transactions hooks diff --git a/doc/source/queries.rst b/doc/source/queries.rst deleted file mode 100644 index 4c77a791..00000000 --- a/doc/source/queries.rst +++ /dev/null @@ -1,258 +0,0 @@ -================ -Advanced queries -================ - -Neomodel contains an API for querying sets of nodes without having to write cypher:: - - class SupplierRel(StructuredRel): - since = DateTimeProperty(default=datetime.now) - - - class Supplier(StructuredNode): - name = StringProperty() - delivery_cost = IntegerProperty() - coffees = RelationshipTo('Coffee', 'SUPPLIES') - - - class Coffee(StructuredNode): - name = StringProperty(unique_index=True) - price = IntegerProperty() - suppliers = RelationshipFrom(Supplier, 'SUPPLIES', model=SupplierRel) - -Node sets and filtering -======================= - -The ``.nodes`` property of a class returns all nodes of that type from the database. - -This set (or `NodeSet`) can be iterated over and filtered on. Under the hood it uses labels introduced in Neo4J 2:: - - # nodes with label Coffee whose price is greater than 2 - Coffee.nodes.filter(price__gt=2) - - try: - java = Coffee.nodes.get(name='Java') - except Coffee.DoesNotExist: - print "Couldn't find coffee 'Java'" - -The filter method borrows the same Django filter format with double underscore prefixed operators: - -- lt - less than -- gt - greater than -- lte - less than or equal to -- gte - greater than or equal to -- ne - not equal -- in - item in list -- isnull - `True` IS NULL, `False` IS NOT NULL -- exact - string equals -- iexact - string equals, case insensitive -- contains - contains string value -- icontains - contains string value, case insensitive -- startswith - starts with string value -- istartswith - starts with string value, case insensitive -- endswith - ends with string value -- iendswith - ends with string value, case insensitive -- regex - matches a regex expression -- iregex - matches a regex expression, case insensitive - -Complex lookups with ``Q`` objects -================================== - -Keyword argument queries -- in `filter`, -etc. -- are "AND"ed together. To execute more complex queries (for -example, queries with ``OR`` statements), `Q objects ` can -be used. - -A `Q object` (``neomodel.Q``) is an object -used to encapsulate a collection of keyword arguments. These keyword arguments -are specified as in "Field lookups" above. - -For example, this ``Q`` object encapsulates a single ``LIKE`` query:: - - from neomodel import Q - Q(name__startswith='Py') - -``Q`` objects can be combined using the ``&`` and ``|`` operators. When an -operator is used on two ``Q`` objects, it yields a new ``Q`` object. - -For example, this statement yields a single ``Q`` object that represents the -"OR" of two ``"name__startswith"`` queries:: - - Q(name__startswith='Py') | Q(name__startswith='Jav') - -This is equivalent to the following SQL ``WHERE`` clause:: - - WHERE name STARTS WITH 'Py' OR name STARTS WITH 'Jav' - -Statements of arbitrary complexity can be composed by combining ``Q`` objects -with the ``&`` and ``|`` operators and use parenthetical grouping. Also, ``Q`` -objects can be negated using the ``~`` operator, allowing for combined lookups -that combine both a normal query and a negated (``NOT``) query:: - - Q(name__startswith='Py') | ~Q(year=2005) - -Each lookup function that takes keyword-arguments -(e.g. `filter`, `exclude`, `get`) can also be passed one or more -``Q`` objects as positional (not-named) arguments. If multiple -``Q`` object arguments are provided to a lookup function, the arguments will be "AND"ed -together. For example:: - - Lang.nodes.filter( - Q(name__startswith='Py'), - Q(year=2005) | Q(year=2006) - ) - -This roughly translates to the following Cypher query:: - - MATCH (lang:Lang) WHERE name STARTS WITH 'Py' - AND (year = 2005 OR year = 2006) - return lang; - -Lookup functions can mix the use of ``Q`` objects and keyword arguments. All -arguments provided to a lookup function (be they keyword arguments or ``Q`` -objects) are "AND"ed together. However, if a ``Q`` object is provided, it must -precede the definition of any keyword arguments. For example:: - - Lang.nodes.get( - Q(year=2005) | Q(year=2006), - name__startswith='Py', - ) - -This would be a valid query, equivalent to the previous example; - -Has a relationship -================== - -The `has` method checks for existence of (one or more) relationships, in this case it returns a set of `Coffee` nodes which have a supplier:: - - Coffee.nodes.has(suppliers=True) - -This can be negated by setting `suppliers=False`, to find `Coffee` nodes without `suppliers`. - -Iteration, slicing and more -=========================== - -Iteration, slicing and counting is also supported:: - - # Iterable - for coffee in Coffee.nodes: - print coffee.name - - # Sliceable using python slice syntax - coffee = Coffee.nodes.filter(price__gt=2)[2:] - -The slice syntax returns a NodeSet object which can in turn be chained. - -Length and boolean methods dont return NodeSet objects and cannot be chained further:: - - # Count with __len__ - print len(Coffee.nodes.filter(price__gt=2)) - - if Coffee.nodes: - print "We have coffee nodes!" - -Filtering by relationship properties -==================================== - -Filtering on relationship properties is also possible using the `match` method. Note that again these relationships must have a definition.:: - - coffee_brand = Coffee.nodes.get(name="BestCoffeeEver") - - for supplier in coffee_brand.suppliers.match(since_lt=january): - print(supplier.name) - -Ordering by property -==================== - -Ordering results by a particular property is done via th `order_by` method:: - - # Ascending sort - for coffee in Coffee.nodes.order_by('price'): - print(coffee, coffee.price) - - # Descending sort - for supplier in Supplier.nodes.order_by('-delivery_cost'): - print(supplier, supplier.delivery_cost) - - -Removing the ordering from a previously defined query, is done by passing `None` to `order_by`:: - - # Sort in descending order - suppliers = Supplier.nodes.order_by('-delivery_cost') - - # Don't order; yield nodes in the order neo4j returns them - suppliers = suppliers.order_by(None) - -For random ordering simply pass '?' to the order_by method:: - - Coffee.nodes.order_by('?') - -Retrieving paths -================ - -You can retrieve a whole path of already instantiated objects corresponding to -the nodes and relationship classes with a single query. - -Suppose the following schema: - -:: - - class PersonLivesInCity(StructuredRel): - some_num = IntegerProperty(index=True, - default=12) - - class CountryOfOrigin(StructuredNode): - code = StringProperty(unique_index=True, - required=True) - - class CityOfResidence(StructuredNode): - name = StringProperty(required=True) - country = RelationshipTo(CountryOfOrigin, - 'FROM_COUNTRY') - - class PersonOfInterest(StructuredNode): - uid = UniqueIdProperty() - name = StringProperty(unique_index=True) - age = IntegerProperty(index=True, - default=0) - - country = RelationshipTo(CountryOfOrigin, - 'IS_FROM') - city = RelationshipTo(CityOfResidence, - 'LIVES_IN', - model=PersonLivesInCity) - -Then, paths can be retrieved with: - -:: - - q = db.cypher_query("MATCH p=(:CityOfResidence)<-[:LIVES_IN]-(:PersonOfInterest)-[:IS_FROM]->(:CountryOfOrigin) RETURN p LIMIT 1", - resolve_objects = True) - -Notice here that ``resolve_objects`` is set to ``True``. This results in ``q`` being a -list of ``result, result_name`` and ``q[0][0][0]`` being a ``NeomodelPath`` object. - -``NeomodelPath`` ``nodes, relationships`` attributes contain already instantiated objects of the -nodes and relationships in the query, *in order of appearance*. - -It would be particularly useful to note here that each object is read exactly once from -the database. Therefore, nodes will be instantiated to their neomodel node objects and -relationships to their relationship models *if such a model exists*. In other words, -relationships with data (such as ``PersonLivesInCity`` above) will be instantiated to their -respective objects or ``StrucuredRel`` otherwise. Relationships do not "reload" their -end-points (unless this is required). - -Async neomodel - Caveats -======================== - -Python does not support async dunder methods. This means that we had to implement some overrides for those. -See the example below:: - - # This will not work as it uses the synchronous __bool__ method - assert await Customer.nodes.filter(prop="value") - - # Do this instead - assert await Customer.nodes.filter(prop="value").check_bool() - assert await Customer.nodes.filter(prop="value").check_nonzero() - - # Note : no changes are needed for sync so this still works : - assert Customer.nodes.filter(prop="value") diff --git a/doc/source/traversal.rst b/doc/source/traversal.rst index 25931d3c..4cbb2fd4 100644 --- a/doc/source/traversal.rst +++ b/doc/source/traversal.rst @@ -1,3 +1,5 @@ +.. _Path traversal: + ============== Path traversal ============== @@ -29,7 +31,7 @@ The `traverse_relations` method allows you to filter on the existence of more co This will generate a Cypher MATCH clause that enforces the existence of at least one path like `Coffee<--Supplier-->Country`. -The `Country` nodes matched will be made available for the rest of the query, with the variable name `country`. Note that this aliasing is optional. See the section on Advanced query operations for examples of how to use this aliasing. (ADD LINK TO DOC) +The `Country` nodes matched will be made available for the rest of the query, with the variable name `country`. Note that this aliasing is optional. See :ref:`Advanced query operations` for examples of how to use this aliasing. .. note::