Skip to content
This repository has been archived by the owner on Sep 19, 2024. It is now read-only.

Commit

Permalink
refactor and test validate cypher
Browse files Browse the repository at this point in the history
  • Loading branch information
dudizimber committed Jul 3, 2024
1 parent c9712b1 commit 8f43ac5
Show file tree
Hide file tree
Showing 11 changed files with 446 additions and 310 deletions.
9 changes: 9 additions & 0 deletions falkordb_gemini_kg/classes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .ontology import Ontology
from .source import Source
from .node import Node
from .edge import Edge

# Setup Null handler
import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
73 changes: 73 additions & 0 deletions falkordb_gemini_kg/classes/attribute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
from falkordb_gemini_kg.fixtures.regex import *
import logging

logger = logging.getLogger(__name__)


class _AttributeType:
STRING = "string"
NUMBER = "number"
BOOLEAN = "boolean"

@staticmethod
def fromString(txt: str):
if txt.isdigit():
return _AttributeType.NUMBER
elif txt.lower() in ["true", "false"]:
return _AttributeType.BOOLEAN
return _AttributeType.STRING


class Attribute:
def __init__(
self, name: str, attr_type: _AttributeType, unique: bool, required: bool = False
):
self.name = name
self.type = attr_type
self.unique = unique
self.required = required

@staticmethod
def from_json(txt: str):
txt = txt if isinstance(txt, dict) else json.loads(txt)
if txt["type"] not in [
_AttributeType.STRING,
_AttributeType.NUMBER,
_AttributeType.BOOLEAN,
]:
raise Exception(f"Invalid attribute type: {txt['type']}")
return Attribute(
txt["name"],
txt["type"],
txt["unique"],
txt["required"] if "required" in txt else False,
)

@staticmethod
def from_string(txt: str):
name = txt.split(":")[0].strip()
attr_type = txt.split(":")[1].split("!")[0].split("*")[0].strip()
unique = "!" in txt
required = "*" in txt

if attr_type not in [
_AttributeType.STRING,
_AttributeType.NUMBER,
_AttributeType.BOOLEAN,
]:
raise Exception(f"Invalid attribute type: {attr_type}")

return Attribute(name, attr_type, unique, required)

def to_json(self):
return {
"name": self.name,
"type": self.type,
"unique": self.unique,
"required": self.required,
}

def __str__(self) -> str:
return f"{self.name}: \"{self.type}{'!' if self.unique else ''}{'*' if self.required else ''}\""

124 changes: 124 additions & 0 deletions falkordb_gemini_kg/classes/edge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import json
import re
import logging
from .attribute import Attribute, _AttributeType
from falkordb import Node as GraphNode, Edge as GraphEdge
from falkordb_gemini_kg.fixtures.regex import *

logger = logging.getLogger(__name__)

class _EdgeNode:
def __init__(self, label: str):
self.label = label

@staticmethod
def from_json(txt: str):
txt = txt if isinstance(txt, dict) else json.loads(txt)
return _EdgeNode(txt["label"])

def to_json(self):
return {"label": self.label}

def __str__(self) -> str:
return f"(:{self.label})"


class Edge:
def __init__(
self,
label: str,
source: _EdgeNode | str,
target: _EdgeNode | str,
attributes: list[Attribute],
):

if isinstance(source, str):
source = _EdgeNode(source)
if isinstance(target, str):
target = _EdgeNode(target)

assert isinstance(label, str), "Label must be a string"
assert isinstance(source, _EdgeNode), "Source must be an EdgeNode"
assert isinstance(target, _EdgeNode), "Target must be an EdgeNode"
assert isinstance(attributes, list), "Attributes must be a list"


self.label = label
self.source = source
self.target = target
self.attributes = attributes

@staticmethod
def from_graph(edge: GraphEdge, nodes: list[GraphNode]):
logger.debug(f"Edge.from_graph: {edge}")
return Edge(
edge.relation,
_EdgeNode(next(n.labels[0] for n in nodes if n.id == edge.src_node)),
_EdgeNode(next(n.labels[0] for n in nodes if n.id == edge.dest_node)),
[
Attribute(
attr,
_AttributeType.fromString(edge.properties),
"!" in edge.properties[attr],
"*" in edge.properties[attr],
)
for attr in edge.properties
],
)

@staticmethod
def from_json(txt: dict | str):
txt = txt if isinstance(txt, dict) else json.loads(txt)
return Edge(
txt["label"],
_EdgeNode.from_json(txt["source"]),
_EdgeNode.from_json(txt["target"]),
(
[Attribute.from_json(attr) for attr in txt["attributes"]]
if "attributes" in txt
else []
),
)

@staticmethod
def from_string(txt: str):
label = re.search(EDGE_LABEL_REGEX, txt).group(0).strip()
source = re.search(NODE_LABEL_REGEX, txt).group(0).strip()
target = re.search(NODE_LABEL_REGEX, txt).group(1).strip()
edge = re.search(EDGE_REGEX, txt).group(0)
attributes = (
edge.split("{")[1].split("}")[0].strip().split(",") if "{" in edge else []
)

return Edge(
label,
_EdgeNode(source),
_EdgeNode(target),
[Attribute.from_string(attr) for attr in attributes],
)

def to_json(self):
return {
"label": self.label,
"source": self.source.to_json(),
"target": self.target.to_json(),
"attributes": [attr.to_json() for attr in self.attributes],
}

def combine(self, edge2: "Edge"):
"""Overwrite attributes of self with attributes of edge2."""
if self.label != edge2.label:
raise Exception("Edges must have the same label to be combined")

for attr in edge2.attributes:
if attr.name not in [a.name for a in self.attributes]:
logger.debug(f"Adding attribute {attr.name} to edge {self.label}")
self.attributes.append(attr)

return self

def to_graph_query(self):
return f"MATCH (s:{self.source.label}) MATCH (t:{self.target.label}) MERGE (s)-[r:{self.label} {{{', '.join([str(attr) for attr in self.attributes])}}}]->(t) RETURN r"

def __str__(self) -> str:
return f"{self.source}-[:{self.label} {{{', '.join([str(attr) for attr in self.attributes])}}}]->{self.target}"
64 changes: 64 additions & 0 deletions falkordb_gemini_kg/classes/node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import json
import logging
from .attribute import Attribute, _AttributeType
from falkordb import Node as GraphNode

logger = logging.getLogger(__name__)

class Node:
def __init__(self, label: str, attributes: list[Attribute]):
self.label = label
self.attributes = attributes

@staticmethod
def from_graph(node: GraphNode):
logger.debug(f"Node.from_graph: {node}")
return Node(
node.labels[0],
[
Attribute(
attr,
_AttributeType.fromString(node.properties[attr]),
"!" in node.properties[attr],
)
for attr in node.properties
],
)

@staticmethod
def from_json(txt: dict | str):
txt = txt if isinstance(txt, dict) else json.loads(txt)
return Node(
txt["label"].replace(" ", ""),
[Attribute.from_json(attr) for attr in txt["attributes"]],
)

def to_json(self):
return {
"label": self.label,
"attributes": [attr.to_json() for attr in self.attributes],
}

def combine(self, node2: "Node"):
"""Overwrite attributes of self with attributes of node2."""
if self.label != node2.label:
raise Exception("Nodes must have the same label to be combined")

for attr in node2.attributes:
if attr.name not in [a.name for a in self.attributes]:
logger.debug(f"Adding attribute {attr.name} to node {self.label}")
self.attributes.append(attr)

return self

def get_unique_attributes(self):
return [attr for attr in self.attributes if attr.unique]

def to_graph_query(self):
return f"MERGE (n:{self.label} {{{', '.join([str(attr) for attr in self.attributes])}}}) RETURN n"

def __str__(self) -> str:
return (
f"(:{self.label} {{{', '.join([str(attr) for attr in self.attributes])}}})"
)

Loading

0 comments on commit 8f43ac5

Please sign in to comment.