Skip to content

Commit

Permalink
Merged concepts branch
Browse files Browse the repository at this point in the history
  • Loading branch information
kerrycobb committed Mar 22, 2024
2 parents 3b8de80 + c2c5a19 commit b2f3cee
Show file tree
Hide file tree
Showing 19 changed files with 862 additions and 1,072 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: BioSeq Tests CI
name: Phylogeni Tests CI
on:
push:
branches: [ "main" ]
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
# PhylogeNi
PhylogeNi is a Nim library with some basic functions for working with phylogenetic trees.

PhylogeNi is a work in progress. Suggestions, contributions, and criticisms are welcome! Breaking changes are likely.
PhylogeNi is a work in progress. Suggestions, criticisms, and especially contributions are welcome! Breaking changes are likely.

## Installation
You will need the Nim compiler to be installed on your system. See https://nim-lang.org/

It is recommended that BioSeq be installed with nimble.
It is recommended that PhylogeNi be installed with nimble.

`nimble install phylogeni`

Expand Down
130 changes: 18 additions & 112 deletions src/phylogeni.nim
Original file line number Diff line number Diff line change
@@ -1,113 +1,19 @@
import ./phylogeni/[
tree,
io/parseNewick,
io/writeNewick,
simulate]

export tree,
parseNewick,
writeNewick,
simulate

## =========
## PhylogeNi
## =========
##
## PhylogeNi is a Nim library for working with phylogenetic trees.
##

runnableExamples:
var t = treeFromString("(B:1.0,C:1.0)A:1.0;")

echo t

# -A /-B
# \-C

for i in t.preorder():
if i.label == "C":
i.addChild(newNode("D", 1.0))
i.addChild(newNode("E", 1.0))
t.ladderize(Ascending)
echo t

# /C /-D
# -A| \-E
# \-B

var str = t.writeNewickString()
echo str
# [&U]((D:1.0,E:1.0)C:1.0,B:1.0)A:1.0;

##
## See the module docs for more details:
## `tree<./phylogeni/tree.html>`_
## Provides basic functions for working with `Tree` and `Node` types such as:
## - Tree and Node creation
## - Topology modification
## - Tree iteration
##
## `parseNewick<./phylogeni/io/parseNewick.html>`_
## Provides functions for reading trees from files or strings.
##
## `writeNewick<./phylogeni/io/writeNewick.html>`_
## Provides functions for writing trees to files or strings.
##
## `simulate<./phylogeni/tree.html>`_
## Provides functions for simulating trees:
## - Pure birth model
## - Birth death model
##
## Generic Node Data
## =================
## `Node` is a generic type which can have any object stored in the data field.
##
## One great feature of PhylogeNi is that you do not need to completely rewrite your
## own parser/writer for custom data types when reading and writing a newick file or string.
## You only need to create `parseAnnotation` and `writeAnnotation` procs to handle
## reading or writing the annotation string.

runnableExamples:
import std/strutils
import std/strformat

type
CustomData = object
posterior: float
credibleInterval: tuple[lower, upper: float]

let treeStr = "(B:1.0[&p:0.95,ci:0.9-1.0],C:1.0[&p:0.95,ci:0.9-1.0])A:1.0[&p:0.95,ci:0.9-1.0];"

proc parseAnnotation(p: var NewickParser[CustomData], annotation: string) =
let annotations = annotation.split(",")
var dataCheck = (p: false, ci: false)
for i in annotations:
let split = i.split(":")
doAssert split.len == 2
case split[0]
of "p":
p.currNode.data.posterior = parseFloat(split[1])
dataCheck.p = true
of "ci":
let ci = split[1].split("-")
doAssert ci.len == 2
p.currNode.data.credibleInterval = (parseFloat(ci[0]), parseFloat(ci[1]))
dataCheck.ci = true
else:
raise newException(NewickError, "Invalid Annotation")
if not dataCheck.p or not dataCheck.ci:
raise newException(NewickError, "")

proc writeAnnotation(node: Node[CustomData], str: var string) =
str.add(fmt"[&p:{$node.data.posterior},ci:{$node.data.credibleInterval.lower}-{$node.data.credibleInterval.upper}]")

let
t = treeFromString(treeStr, CustomData)
str = t.writeNewickString()
echo str
# [&U](B:1.0[&p:0.95,ci:0.9-1.0],C:1.0[&p:0.95,ci:0.9-1.0])A:1.0[&p:0.95,ci:0.9-1.0];





concepts,
# coordinates,
manipulate,
newickParser,
newickWriter,
nexusParser,
nodeTypes,
traverse]

export
concepts,
# coordinates,
manipulate,
newickParser,
newickWriter,
nexusParser,
nodeTypes,
traverse
140 changes: 140 additions & 0 deletions src/phylogeni/concepts.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import std/[strutils, sequtils]

type
TreeError* = object of CatchableError

type
TraversableNode* = concept n, type T
n.parent is T
for i in n.children:
i is T

func isLeaf*(node: TraversableNode): bool =
## Check if node is leaf.
if node.children.len == 0:
result = true
else:
result = false

func isRoot*(node: TraversableNode): bool =
if node.parent.isNil:
result = true
else:
result = false

proc mrca*(a, b: TraversableNode): TraversableNode =
## Get the most recent common ancestor of two nodes.
for i in a.iterAncestors:
for j in b.iterAncestors:
if i == j:
return i
raise newException(TreeError, "No MRCA shared by nodes")


###############################
# Labeled Node
type
LabeledNode* = concept n
n is TraversableNode
n.label is string

func find*(tree: LabeledNode, str: string): LabeledNode =
## Returns first instance of node label matching str.
for i in tree.preorder:
if i.label == str:
return i

func `$`*(node: LabeledNode): string =
node.label

func get_ascii(node: LabeledNode, char1="-", showInternal=true): tuple[clines: seq[string], mid:int] =
## Generates ascii string representation of tree.
var
len = 3
if node.children.len == 0 or showInternal == true:
if node.label.len > len:
len = node.label.len
var
pad = strutils.repeat(' ', len)
pa = strutils.repeat(' ', len-1)
if node.children.len > 0:
var
mids: seq[int]
results: seq[string]
for child in node.children:
var char2: string
if node.children.len == 1:
char2 = "-"
elif child == node.children[0]:
char2 = "/"
elif child == node.children[^1]:
char2 = "\\"
else:
char2 = "-"
var (clines, mid) = get_ascii(child, char2, showInternal)
mids.add(mid+len(results))
results.add(clines)
var
lo = mids[0]
hi = mids[^1]
last = len(results)
mid = int((lo+hi)/2)
prefixes: seq[string]
prefixes.add(sequtils.repeat(pad, lo+1))
if mids.len > 1:
prefixes.add(sequtils.repeat(pa & "|", hi-lo-1))
prefixes.add(sequtils.repeat(pad, last-hi))
prefixes[mid] = char1 & strutils.repeat("-", len-2) & prefixes[mid][^1]
var new_results: seq[string]
for (p, r) in zip(prefixes, results):
new_results.add(p&r)
if showInternal:
var stem = new_results[mid]
new_results[mid] = stem[0] & node.label & stem[node.label.len+1..^1]
result = (new_results, mid)
else:
result = (@[char1 & "-" & node.label], 0)

func ascii*(node: LabeledNode, char1="-", showInternal=true): string =
## Returns ascii string representation of tree.
var (lines, _) = get_ascii(node, char1, showInternal)
result = lines.join("\n")


###############################
# Length Node
type
LengthNode* = concept n
n is TraversableNode
n.length is SomeNumber

func calcTreeLength*(node: LengthNode): float =
## Calculate total length of tree.
result = 0.0
for child in node.children:
for i in child.preorder():
result += i.length

func treeHeight*(node: LengthNode): float =
## Calculate the height of subtree.
var maxHeight = 0.0
for child in node.children:
let childHeight = treeHeight(child)
maxHeight = max(maxHeight, childHeight)
result = maxHeight + node.length


###############################
# Data readable from Newick string
type
ReadableDataNode* = concept n
n is TraversableNode
n.parseNewickData(string)


###############################
# Data writable to Newick string
type
WritableDataNode* = concept n
n is TraversableNode
n.writeNewickData is string
61 changes: 61 additions & 0 deletions src/phylogeni/coordinates.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import ./concepts, ./traverse

type
CoordNode*[T] = ref object
parent: CoordNode[T]
children: seq[CoordNode[T]]
x: float # Horizontal position of node, equivalent to node height
y: float # Vertical position of node
node: T

proc parent*[T](n: CoordNode[T]): CoordNode[T] =
n.parent

proc children*[T](n: CoordNode[T]): seq[CoordNode[T]] =
n.children

proc x*[T](n: CoordNode[T]): float =
n.x

proc y*[T](n: CoordNode[T]): float =
n.y

proc node*[T](n: CoordNode[T]): T =
n.node

proc newCoordNode[T: TraversableNode](node: T): CoordNode[T] =
result = CoordNode[T](node: new(T))
result.node[] = node[]

proc addChild[T: TraversableNode](parent, child: CoordNode[T]) =
parent.children.add(child)
child.parent = parent
parent.node.children.add(child.node)
child.node.parent = parent.node.parent
# parent.node.addChild(child.node) # TODO: Use this when the proc for TraversableNode concept works

# proc getCoords*[T: LengthNode](root: T, branchLengthScaling=1.0, branchSep=1.0): CoordNode[T] =
# ## Return coordinates for a typical rectangular or slanted phylogeny
# assert branchLengthScaling > 0
# assert branchSep > 0
# var
# leafY = 0.0
# currNode = CoordNode[T](node: new(T)) # Placeholder, is parent to root node of new tree
# for i in root.newickorder:
# case i.state
# of ascendingTree:
# var newNode = newCoordNode(i.node)
# currNode.addChild(newNode)
# newNode.x = currNode.x + (i.node.length * branchLengthScaling)
# if i.node.isLeaf:
# newNode.y = leafY
# leafY += branchSep
# else:
# currNode = newNode
# of descendingTree:
# let
# lo = currNode.children[0].y
# up = currNode.children[^1].y
# currNode.y = (up - lo) / 2 + lo
# currNode = currNode.parent
# result = currNode.children[0]
Loading

0 comments on commit b2f3cee

Please sign in to comment.