Skip to content

Commit

Permalink
Merge branch 'release/v2.1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
CiaranOMara committed Feb 27, 2019
2 parents 6e9e6d3 + 076521a commit 3b15a66
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 85 deletions.
32 changes: 17 additions & 15 deletions Manifest.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# This file is machine-generated - editing it directly is not advised

[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

[[Bedgraph]]
deps = ["Test"]
git-tree-sha1 = "815a4d7dad0145bb4560d77840d77b33e07861fe"
git-tree-sha1 = "039106453269e691fb54fb14e2b40f6c26091f62"
uuid = "0bcc2ff6-69eb-520d-bede-0374fc5bd2fd"
version = "1.0.3"
version = "1.1.0"

[[BinaryProvider]]
deps = ["Libdl", "Pkg", "SHA", "Test"]
Expand Down Expand Up @@ -64,7 +66,7 @@ deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"

[[Distributed]]
deps = ["LinearAlgebra", "Random", "Serialization", "Sockets"]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[FileIO]]
Expand All @@ -78,7 +80,7 @@ deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"

[[InteractiveUtils]]
deps = ["LinearAlgebra", "Markdown"]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[IterableTables]]
Expand Down Expand Up @@ -188,15 +190,15 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[[StatsBase]]
deps = ["DataStructures", "DelimitedFiles", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "Test"]
git-tree-sha1 = "7b596062316c7d846b67bf625d5963a832528598"
git-tree-sha1 = "6479f15167b434c765d3508524264bfd63ded7c8"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
version = "0.27.0"
version = "0.28.0"

[[TableShowUtils]]
deps = ["DataValues", "Dates", "JSON", "Markdown", "Test"]
git-tree-sha1 = "b588b0bc89c107e6598b2bac967205932634d92d"
git-tree-sha1 = "14c54e1e96431fb87f0d2f5983f090f1b9d06457"
uuid = "5e66a065-1f0a-5976-b372-e0b8c017ca10"
version = "0.2.3"
version = "0.2.5"

[[TableTraits]]
deps = ["IteratorInterfaceExtensions", "Test"]
Expand All @@ -206,15 +208,15 @@ version = "0.4.1"

[[TableTraitsUtils]]
deps = ["DataValues", "IteratorInterfaceExtensions", "Missings", "TableTraits", "Test"]
git-tree-sha1 = "a355f1882d64881a11f853e64dcc353975c4df6e"
git-tree-sha1 = "55133a5476b61ec31060e555ffe12da27ac13682"
uuid = "382cd787-c1b6-5bf2-a167-d5b971a19bda"
version = "0.3.1"
version = "0.4.0"

[[Tables]]
deps = ["IteratorInterfaceExtensions", "LinearAlgebra", "Requires", "TableTraits", "Test"]
git-tree-sha1 = "37be2ed169d5771c1ac8d516d3bcb0093c49966e"
git-tree-sha1 = "5aa45584645393c1717e0cc1f0362c2ea81470a9"
uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
version = "0.1.15"
version = "0.1.17"

[[Test]]
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
Expand All @@ -227,14 +229,14 @@ uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
version = "0.8.1"

[[UUIDs]]
deps = ["Random"]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[[WeakRefStrings]]
deps = ["Missings", "Random", "Test"]
git-tree-sha1 = "bccf012a0e8815410ad8b6846944a33903992fc3"
git-tree-sha1 = "cf70c71939e621a3fac4156a8bfb3c80d745794a"
uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
version = "0.5.6"
version = "0.5.7"
8 changes: 6 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BedgraphFiles"
uuid = "85eb9095-274b-55ce-be28-9e90f41ac741"
authors = ["Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
version = "2.0.2"
version = "2.1.0"

[deps]
Bedgraph = "0bcc2ff6-69eb-520d-bede-0374fc5bd2fd"
Expand All @@ -14,8 +14,12 @@ TableShowUtils = "5e66a065-1f0a-5976-b372-e0b8c017ca10"
TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
TableTraitsUtils = "382cd787-c1b6-5bf2-a167-d5b971a19bda"

[compat]
Bedgraph = "^1.1"

[extras]
Query = "1a8c2f83-1ff3-5112-b086-8aa67b057ba1"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["Test", "Query"]
72 changes: 53 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,43 @@ add("BedgraphFiles")

## Usage

### Load a Bedgraph file
### Loading a Bedgraph file

To read a Bedgraph file into a ``DataFrame``, use the following Julia code:
To load a Bedgraph file into a ``Vector{Bedgraph.Record}``, use the following Julia code:

````julia
using FileIO, BedgraphFiles, DataFrames
using FileIO, BedgraphFiles, Bedgraph

df = DataFrame(load("data.bedgraph"))
records = Vector{Bedgraph.Record}(load("data.bedgraph"))
````

The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/davidanthoff/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/davidanthoff/IterableTables.jl). Here are some examples of materialising a Bedgraph file into data structures that are not a ``DataFrame``:
### Saving a Bedgraph file

> **Note:** saving on top of an existing file will overwrite metadata/header information with a minimal working header.
The following example saves a ``Vector{Bedgraph.Record}`` to a Bedgraph file:
````julia
using FileIO, BedgraphFiles, Bedgraph

records = [Bedgraph.Record("chr", i, i + 99, rand()) for i in 1:100:1000]

save("output.bedgraph", records)
````

### IterableTables
The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/davidanthoff/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/davidanthoff/IterableTables.jl).

To load a Bedgraph file into a `DataFrame`, use the following Julia code:

```julia
using FileIO, BedgraphFiles, DataFrames

df = DataFrame(load("data.bedgraph"))
```

Here are some examples of materialising a Bedgraph file into data structures that are not a `DataFrame`:

```julia
using FileIO, BedgraphFiles, DataTables, IndexedTables, Gadfly

# Load into a DataTable
Expand All @@ -47,38 +71,48 @@ it = IndexedTable(load("data.bedgraph"))

# Plot directly with Gadfly
plot(load("data.bedgraph"), x=:a, y=:b, Geom.line)
````

### Save a Bedgraph file

> **Note:** saving on top of an existing file will overwrite metadata/header information with a minimal working header.
```

The following code saves any iterable table as a Bedgraph file:
````julia
```julia
using FileIO, BedgraphFiles

it = getiterator(data)

save("output.bedgraph", it)
````
This will work as long as ``it`` is any of the types supported as sources in [IterableTables.jl](https://github.com/davidanthoff/IterableTables.jl).
```
This will work as long as `it` is any of the types supported as sources in [IterableTables.jl](https://github.com/davidanthoff/IterableTables.jl).


### Using the pipe syntax

Both ``load`` and ``save`` also support the pipe syntax. For example, to load a Bedgraph file into a ``DataFrame``, one can use the following code:
Both `load` and `save` also support the pipe syntax. For example, to load a Bedgraph file into a `DataFrame`, one can use the following code:

````julia
```julia
using FileIO, BedgraphFiles, DataFrame

df = load("data.bedgraph") |> DataFrame
````
```

To save an iterable table, one can use the following form:

````julia
```julia
using FileIO, BedgraphFiles, DataFrame

df = # Aquire a DataFrame somehow

df |> save("output.bedgraph")
````
```

The `save` method returns the data provided or `Vector{Bedgraph.Record}`. This is useful when periodically saving your work during a sequence of operations.

The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/davidanthoff/Query.jl) queries, for example one can easily load a Bedgraph file, pipe it into a query, then pipe it to the ``save`` function to store the results in a new file.
```julia
records = some sequence of operations |> save("output.bedgraph")
```

The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/davidanthoff/Query.jl) queries. For example, one can easily load a Bedgraph file, pipe it into a query, then pipe it to the `save` function to store the results in a new file.

```julia
using FileIO, BedgraphFiles, Query
load("data.bedgraph") |> @filter(_.chrom == "chr19") |> save("data-chr19.bedgraph")
```
2 changes: 1 addition & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
julia 0.7
Bedgraph 1.0.0
Bedgraph 1.1.0
DataFrames 0.9.0
FileIO 1.0.1
IterableTables 0.9.0
Expand Down
98 changes: 73 additions & 25 deletions src/BedgraphFiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ struct BedgraphFile
keywords
end

function Base.convert(::Type{Bedgraph.Record}, row::DataFrameRow) :: Bedgraph.Record
return Bedgraph.Record(row[1], row[2], row[3], row[4]) # Note: using index to allow flexible column names.
end

function Base.convert(::Type{Vector{Bedgraph.Record}}, df::DataFrame) :: Vector{Bedgraph.Record}

records = Vector{Bedgraph.Record}(undef, size(df)[1])

for (i, row) in enumerate(eachrow(df))
records[i] = convert(Bedgraph.Record, row)
end

return records
end

function Base.show(io::IO, source::BedgraphFile)
TableShowUtils.printtable(io, getiterator(source), "bedGraph file")
end
Expand All @@ -29,27 +44,38 @@ end

IteratorInterfaceExtensions.isiterable(x::BedgraphFile) = true
TableTraits.isiterabletable(x::BedgraphFile) = true
IteratorInterfaceExtensions.isiterable(x::Vector{Bedgraph.Record}) = true #Note: Vector{Bedgraph.Record} is iterable by default.
TableTraits.isiterabletable(x::Vector{Bedgraph.Record}) = true

function _loaddata(path)

function _loaddata(path) :: Vector{Bedgraph.Record}
# Read file using bedgraph package.
return open(path, "r") do io
Bedgraph.readRecords(io)
end
end

function IteratorInterfaceExtensions.getiterator(records::Vector{Bedgraph.Record})

columns = [
Bedgraph.chrom.(records),
Bedgraph.first.(records),
Bedgraph.last.(records),
Bedgraph.value.(records)
]

names = Symbol[:chrom, :first, :last, :value]

it = TableTraitsUtils.create_tableiterator(columns, names)

return it
end

function IteratorInterfaceExtensions.getiterator(file::BedgraphFile)

records = _loaddata(file.filename)

# Pack records into DataFrame.
df = DataFrame(
chrom = Bedgraph.chrom.(records)::Vector{String},
first = first.(records)::Vector{Int},
last = last.(records)::Vector{Int},
value = Bedgraph.value.(records)
)

it = getiterator(df)
it = getiterator(records)

return it
end
Expand All @@ -58,36 +84,58 @@ function Base.collect(x::BedgraphFile)
return collect(getiterator(x))
end

function save(file::BedgraphFileFormat, header::Bedgraph.BedgraphHeader, records::Vector{Bedgraph.Record})
function _Records(x) :: Vector{Bedgraph.Record} #TODO: consider formalising Records function in bedgraph (e.g. Bedgraph.Records, Bedgraph.Bedgraph.Records) that returns Vector{Bedgraph.Record}.
cols, names = create_columns_from_iterabletable(x, na_representation=:missing)

return convert(Vector{Bedgraph.Record}, cols[1], cols[2], cols[3], cols[4])
end

function Vector{Bedgraph.Record}(x::AbstractVector{T}) :: Vector{Bedgraph.Record} where {T<:NamedTuple}
@debug "Vector{Bedgraph.Record}(x::AbstractVector{T})"
return _Records(x)
end

function Vector{Bedgraph.Record}(file::B) :: Vector{Bedgraph.Record} where {B<:BedgraphFile}
@debug "Vector{Bedgraph.Record}(file::BedgraphFile)"
return _loaddata(file.filename)
end

function Vector{Bedgraph.Record}(x::T) :: Vector{Bedgraph.Record} where {T} #TODO: consider formalising Records function in bedgraph (e.g. Bedgraph.Records, Bedgraph.Bedgraph.Records) that returns Vector{Bedgraph.Record}.

if TableTraits.isiterabletable(x)
@debug "Vector{Bedgraph.Record}(x) - isiterabletable"
return _Records(x)
else
@debug "Vector{Bedgraph.Record}(x) - converting"
return convert(Vector{Bedgraph.Record}, x)
end
end

function save(file::BedgraphFileFormat, header::Bedgraph.BedgraphHeader, records::Vector{Bedgraph.Record}) :: Vector{Bedgraph.Record}

write(file.filename, header, records)

return records #Note: this return is useful when piping (e.g., records = some_operation | save(file)).
end

function save(file::BedgraphFileFormat, records::Vector{Bedgraph.Record}; bump_forward = true)
function save(file::BedgraphFileFormat, records::Vector{Bedgraph.Record}; bump_forward = true) :: Vector{Bedgraph.Record}

#TODO: bump_forward records.
sort!(records)

header = Bedgraph.generateBasicHeader(records, bump_forward = bump_forward)
header = Bedgraph.generateBasicHeader(records, bump_forward = bump_forward) #TODO: consolidate header generation and determine whether there is a need for bump_forward.

return save(file, header, records)
end

function save(file::BedgraphFileFormat, data; bump_forward = true)
isiterabletable(data) || error("Can't write this data to bedGraph file.")

it = getiterator(data)

df = DataFrame(it)
records = Vector{Bedgraph.Record}(it)

# Pack DataFrame in to a vector of type record.
records = Vector{Bedgraph.Record}(undef, length(it))
save(file, records, bump_forward = bump_forward)

for (i, row) in enumerate(eachrow(df))
records[i] = Bedgraph.Record(row[1], row[2], row[3], row[4]) # Note: using index to allow flexible column names.
end

header = Bedgraph.generateBasicHeader(records, bump_forward = bump_forward)

return save(file, header, records)
return data #Note: this return is usful when piping.
end

end # module
Loading

0 comments on commit 3b15a66

Please sign in to comment.