Skip to content

Commit

Permalink
added support for ES 7 (fixes #155) (#161)
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Sep 4, 2019
1 parent d6f9ce1 commit 3283bca
Show file tree
Hide file tree
Showing 15 changed files with 377 additions and 125 deletions.
45 changes: 25 additions & 20 deletions .ci/seed_es_on_travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,104 +17,109 @@ TEST_DATA_DIR=$(pwd)/test_data
LEGACY_MAPPING_FILE="${TEST_DATA_DIR}/legacy_shakespeare_mapping.json"
ES5_MAPPING_FILE="${TEST_DATA_DIR}/es5_shakespeare_mapping.json"
ES6_MAPPING_FILE="${TEST_DATA_DIR}/es6_shakespeare_mapping.json"
ES7_MAPPING_FILE="${TEST_DATA_DIR}/es7_shakespeare_mapping.json"

SAMPLE_DATA_FILE=${TEST_DATA_DIR}/sample.json
ES7_SAMPLE_DATA_FILE=${TEST_DATA_DIR}/sample_es7.json

case "$ES_VERSION" in
"") ;;

"1.0.0")
export ES_VERSION=1.0.0;
export MAPPING_FILE=${LEGACY_MAPPING_FILE};
export ES_BINARY_URL="${ES1_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"1.4.4")
export ES_VERSION=1.4.4;
export MAPPING_FILE=${LEGACY_MAPPING_FILE};
export ES_BINARY_URL="${ES1_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"1.7.2")
export ES_VERSION=1.7.2;
export MAPPING_FILE=${LEGACY_MAPPING_FILE};
export ES_BINARY_URL="${ES1_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"2.0.2")
export ES_VERSION=2.0.2;
export MAPPING_FILE=${LEGACY_MAPPING_FILE};
export ES_BINARY_URL="${ES2_ARCHIVE}/$ES_VERSION/elasticsearch-$ES_VERSION.deb"
;;

"2.1.2")
export ES_VERSION=2.1.2;
export MAPPING_FILE=${LEGACY_MAPPING_FILE};
export ES_BINARY_URL="${ES2_ARCHIVE}/$ES_VERSION/elasticsearch-$ES_VERSION.deb"
;;

"2.2.2")
export ES_VERSION=2.2.2;
export MAPPING_FILE=${LEGACY_MAPPING_FILE};
export ES_BINARY_URL="${ES2_ARCHIVE}/$ES_VERSION/elasticsearch-$ES_VERSION.deb"
;;

"2.3.5")
export ES_VERSION=2.3.5;
export MAPPING_FILE=${LEGACY_MAPPING_FILE};
export ES_BINARY_URL="${ES2_ARCHIVE}/$ES_VERSION/elasticsearch-$ES_VERSION.deb"
;;

"5.0.2")
export ES_VERSION=5.0.2;
export MAPPING_FILE=${ES5_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"5.3.3")
export ES_VERSION=5.3.3;
export MAPPING_FILE=${ES5_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"5.4.3")
export ES_VERSION=5.4.3;
export MAPPING_FILE=${ES5_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"5.6.9")
export ES_VERSION=5.6.9;
export MAPPING_FILE=${ES5_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"6.0.1")
export ES_VERSION=6.0.1;
export MAPPING_FILE=${ES6_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"6.1.4")
export ES_VERSION=6.1.4;
export MAPPING_FILE=${ES6_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"6.2.4")
export ES_VERSION=6.2.4;
export MAPPING_FILE=${ES6_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION.deb"
;;

"7.3.1")
export MAPPING_FILE=${ES7_MAPPING_FILE};
export ES_BINARY_URL="${ES5PLUS_ARCHIVE}/elasticsearch-$ES_VERSION-amd64.deb"
# overwrite SAMPLE_DATA_FILE to use the ES7-compliant data
export SAMPLE_DATA_FILE="${ES7_SAMPLE_DATA_FILE}"
;;
esac

# pull the binary
curl -O ${ES_BINARY_URL}
curl ${ES_BINARY_URL} \
--output elasticsearch.deb

# start the service and wait a bit
sudo dpkg \
-i \
--force-confnew \
elasticsearch-$ES_VERSION.deb
elasticsearch.deb

# deal with permissions
# reference: https://discuss.elastic.co/t/permission-denied-starting-elasticsearch-7-0/179336
sudo chown -R \
elasticsearch:elasticsearch \
/etc/default/elasticsearch

sudo service elasticsearch start
sleep ${SLEEP_TIL_STARTUP_SECONDS}
sudo service elasticsearch status
Expand All @@ -135,7 +140,7 @@ curl -X PUT \
-H 'Content-Type:application/json' \
-d @shakespeare_mapping.json

mv test_data/sample.json sample.json
mv ${SAMPLE_DATA_FILE} sample.json

curl -X POST \
"${ES_HOST}/shakespeare/_bulk" \
Expand Down
2 changes: 1 addition & 1 deletion .ci/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export JAVA_APT_PKG="oracle-java8-set-default"
# install these testing packages we need
if [[ "$TASK" == "rpkg" ]];
then
Rscript -e "install.packages(c('devtools', 'knitr', 'testthat', 'rmarkdown'), repos = 'http://cran.rstudio.com')"
Rscript -e "install.packages(c('data.table', 'devtools', 'futile.logger', 'knitr', 'testthat', 'rmarkdown', 'uuid'), repos = 'http://cran.rstudio.com')"
cp test_data/* r-pkg/inst/testdata/
fi

Expand Down
12 changes: 12 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ script:
# - ES_VERSION=6.0.1
# - ES_VERSION=6.1.4
# - ES_VERSION=6.2.4
# - ES_VERSION=7.3.1
matrix:
include:
############
Expand Down Expand Up @@ -119,6 +120,12 @@ matrix:
env:
- ES_VERSION=6.2.4
- TASK=rpkg
- language: r
warnings_are_errors: true
cache: packages
env:
- ES_VERSION=7.3.1
- TASK=rpkg
after_success:
- .ci/report_to_covr.sh
#################
Expand Down Expand Up @@ -194,3 +201,8 @@ matrix:
env:
- ES_VERSION=6.2.4
- TASK=pypkg
- language: python
python: 3.5
env:
- ES_VERSION=7.3.1
- TASK=pypkg
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# uptasticsearch development version

## Features

### Added support for ES7.x
- [#161](https://github.com/uptake/uptasticsearch/pull/161) Added support for ES7.x. The biggest changes between that major version and 6.x were the removal of `_all` as a way to reference all indices, changing the response format of `hits.total` into an object like `{"hits": {"total": 50}}`, and restricting all indices to have a single type of document. More details can be found at https://www.elastic.co/guide/en/elasticsearch/reference/current/breaking-changes-7.0.html.

# uptasticsearch 0.3.1

## Bugfixes
Expand Down
2 changes: 1 addition & 1 deletion py-pkg/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import subprocess
import datetime

CURRENT_VERSION = "0.1.0"
CURRENT_VERSION = "0.2.0"

#########################

Expand Down
21 changes: 13 additions & 8 deletions py-pkg/tests/test_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,26 @@
from uptasticsearch.clients import Uptasticsearch2
from uptasticsearch.clients import Uptasticsearch5
from uptasticsearch.clients import Uptasticsearch6
from uptasticsearch.clients import Uptasticsearch7
from uptasticsearch.clients import uptasticsearch_factory

from uptasticsearch.fetch_all import es_search


class TestEsSearch(object):
"""
es_search should work an return a pandas DataFrame
es_search should work and return a pandas DataFrame
"""
host = "http://127.0.0.1:9200"

def test_rectangle(self):
assert isinstance(es_search(self.host,
"shakespeare",
query_body=json.dumps({}),
size=10000,
max_hits=10,
scroll="1m"),
pd.DataFrame)
result = es_search(
self.host,
"shakespeare",
query_body=json.dumps({}),
size=10000,
max_hits=10,
scroll="1m"
)
print(result)
assert isinstance(result, pd.DataFrame)
31 changes: 26 additions & 5 deletions py-pkg/uptasticsearch/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ def __init__(self, url, http_client=HttpClient()):
self.url = _format_es_url(url)
self.client = http_client

def _get_total_hits(self, response_json):
"""
Given a dictionary representing the content of a
respose to a ``POST /_search`` request, return the total
number of docs matching the query
"""
return response_json['hits']['total']

def search(self, body, index="", doc_type="", scroll_context_timer="1m", page_size=10000, max_hits=None):
"""Execute a Search Query on the Elasticsearch Cluster
Expand Down Expand Up @@ -60,7 +68,7 @@ def search(self, body, index="", doc_type="", scroll_context_timer="1m", page_si
headers={'Content-Type': 'application/json'})

page = response.json()
total_hits = page['hits']['total']
total_hits = self._get_total_hits(page)
total_hits = min(total_hits, max_hits) if max_hits is not None else total_hits

results = [d['_source'] for d in page['hits']['hits']]
Expand Down Expand Up @@ -118,13 +126,26 @@ def _make_scroll_request(self, scroll_context_timer, scroll_id):
"scroll_id": scroll_id}),
headers={'Content-Type': 'application/json'})

class Uptasticsearch7(Uptasticsearch6):

def _get_total_hits(self, response_json):
"""
Given a dictionary representing the content of a
respose to a ``POST /_search`` request, return the total
number of docs matching the query
"""
return response_json['hits']['total']['value']


def uptasticsearch_factory(url, retries=5, backoff_factor=0.1):
http_client = HttpClient(retries=retries, backoff_factor=backoff_factor)
es_url = _format_es_url(url)
cluster_version = http_client.get(es_url).json()['version']['number'].split('.')[0]

return {"1": Uptasticsearch1,
"2": Uptasticsearch2,
"5": Uptasticsearch5,
"6": Uptasticsearch6}[cluster_version](es_url, http_client)
return {
"1": Uptasticsearch1,
"2": Uptasticsearch2,
"5": Uptasticsearch5,
"6": Uptasticsearch6,
"7": Uptasticsearch7
}[cluster_version](es_url, http_client)
2 changes: 1 addition & 1 deletion r-pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: uptasticsearch
Type: Package
Title: Get Data Frame Representations of 'Elasticsearch' Results
Version: 0.3.1
Version: 0.4.1
Authors@R: c(
person("James", "Lamb", email = "jaylamb20@gmail.com", role = c("aut", "cre")),
person("Nick", "Paras", email = "nick.paras@uptake.com", role = c("aut")),
Expand Down
8 changes: 7 additions & 1 deletion r-pkg/R/es_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,13 @@ es_search <- function(es_host

# Parse to JSON to get total number of documents matching the query
firstResult <- jsonlite::fromJSON(firstResultJSON, simplifyVector = FALSE)
hits_to_pull <- min(firstResult[["hits"]][["total"]], max_hits)

major_version <- .get_es_version(es_host)
if (as.integer(major_version) > 6){
hits_to_pull <- min(firstResult[["hits"]][["total"]][["value"]], max_hits)
} else {
hits_to_pull <- min(firstResult[["hits"]][["total"]], max_hits)
}

# If we got everything possible, just return here
hits_pulled <- length(firstResult[["hits"]][["hits"]])
Expand Down
Loading

0 comments on commit 3283bca

Please sign in to comment.