Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
eikek committed May 21, 2022
0 parents commit 8d526a1
Show file tree
Hide file tree
Showing 12 changed files with 1,139 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#! /bin/sh

# reload when these files change
watch_file flake.nix
watch_file flake.lock

{
# shell gc root dir
mkdir -p "$(direnv_layout_dir)"
eval "$(nix print-dev-env --profile $(direnv_layout_dir)/flake-profile)"
}
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.direnv/
target/
test/tmp/
result
674 changes: 674 additions & 0 deletions LICENSE.txt

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Docspell with audio files

This is a simple addon for [Docspell](https://docspell.org) to add
support for audio files. It uses external tools like ffmpeg and stt to
extract text from an audio file. It uses wkhtmltopdf to create a pdf
file.

The result is a preview + pdf and indexed text in the Docspell dms.

*Note: The accuracy of text extraction depends on the used models. For
simplicty this addon uses pre-trained models. For better results, you
need to train your own models.*


## Prerequisites

This addon supports these runners: `nix-flake` and `trivial`. An
internet connection is required to download the model files.

It is recommended to install [nix](https://nixos.org) on the machine
running joex. This allows to use the `nix-flake` runner which can
build the addon with all dependencies automatically.

Otherwise, for the trivial runner, you need to install these tools
manually: curl, ffmpeg, [stt](https://github.com/coqui-ai/STT)
(v0.9.x) and wkhtmltopdf. The latter should already be available since
it is a requirement for joex itself.

Only `x86_64` archtiecture is supported, because nixpkgs doesn't
provide `stt` for other architectures.


## Usage

Currently there is nothing to configure. Just install the addon and
add it to a run configuration.


## Testing

Install [direnv](https://direnv.net/) and [nix](https://nixos.org) and
allow the source root via `direnv allow`. This applies the `devShell`
settings from `flake.nix`. Then build the addon:

```
nix build
```

Now you can run it:

```
./result/bin/audio-files-addon
```

It will run on the test files provided in `test/` and put results in
`test/tmp`.

For quicker turnaround you can also run the source file itself. This
works, because `devShell` puts all required binaries in path.
29 changes: 29 additions & 0 deletions docspell-addon.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
meta:
name: "audio-files-addon"
version: "0.1.0"
description: |
This addon adds support for audio files. Audio files are processed
by a speech-to-text engine and a pdf is generated.
It doesn't expect any user arguments at the moment. It requires
internet access to download model files.
triggers:
- final-process-item
- final-reprocess-item
- existing-item

runner:
nix:
enable: true

docker:
enable: false

trivial:
enable: true
exec: src/addon.scm

options:
networking: true
collectOutput: true
42 changes: 42 additions & 0 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 56 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
description = "A docspell addon for basic audio file support";

inputs = {
utils.url = "github:numtide/flake-utils";

# Nixpkgs / NixOS version to use.
nixpkgs.url = "nixpkgs/nixos-21.11";
};

outputs = { self, nixpkgs, utils }:
utils.lib.eachSystem ["x86_64-linux"] (system:
let
pkgs = import nixpkgs {
inherit system;
overlays = [

];
};
name = "audio-files-addon";
in rec {
packages.${name} = pkgs.callPackage ./nix/addon.nix {
inherit name;
};

defaultPackage = packages.${name};

apps.${name} = utils.lib.mkApp {
inherit name;
drv = packages.${name};
};
defaultApp = apps.${name};

devShell = pkgs.mkShell {
inputsFrom = builtins.attrValues self.packages.${system};
buildInputs =
[ pkgs.guile
pkgs.guile-json
pkgs.stt
pkgs.wkhtmltopdf
pkgs.ffmpeg
];

ADDON_DIR = self;
TMPDIR = "/tmp";
ITEM_DATA_JSON="test/item_data.json";
ITEM_ORIGINAL_JSON="test/item_source.json";
ITEM_ORIGINAL_DIR="test/original";
TMP_DIR="test/tmp";
CACHE_DIR="test/tmp";
OUTPUT_DIR="test/tmp";
GUILE_WARN_DEPRECATED="detailed";
};
}
);
}
34 changes: 34 additions & 0 deletions nix/addon.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{ stdenv, bash, cacert, curl, stt, wkhtmltopdf, ffmpeg, guile, guile-json, lib, name }:

stdenv.mkDerivation {
inherit name;
src = lib.sources.cleanSource ../.;

buildInputs = [ guile guile-json ];

patchPhase = ''
TARGET=src/addon.scm
sed -i 's,\*curl\* "curl",\*curl\* "${curl}/bin/curl",g' $TARGET
sed -i 's,\*ffmpeg\* "ffmpeg",\*ffmpeg\* "${ffmpeg}/bin/ffmpeg",g' $TARGET
sed -i 's,\*stt\* "stt",\*stt\* "${stt}/bin/stt",g' $TARGET
sed -i 's,\*wkhtmltopdf\* "wkhtmltopdf",\*wkhtmltopdf\* "${wkhtmltopdf}/bin/wkhtmltopdf",g' $TARGET
'';

buildPhase = ''
guild compile -o ${name}.go src/addon.scm
'';


# module name must be same as <filename>.go
installPhase = ''
mkdir -p $out/{bin,lib}
cp ${name}.go $out/lib/
cat > $out/bin/${name} <<-EOF
#!${bash}/bin/bash
export SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt"
exec -a "${name}" ${guile}/bin/guile -C ${guile-json}/share/guile/ccache -C $out/lib -e '(${name}) main' -c "" \$@
EOF
chmod +x $out/bin/${name}
'';
}
Loading

0 comments on commit 8d526a1

Please sign in to comment.