From 1e643c58b58f93a62eb8f5b79e3b92e68b0f62f1 Mon Sep 17 00:00:00 2001 From: dl239 Date: Thu, 13 Jan 2022 02:22:25 -0600 Subject: [PATCH] feat: update demo (#1031) --- demo/README.md | 115 +++++++++++++++--- demo/predict-taxi-trip-duration-nb/Dockerfile | 3 +- .../script/import.py | 67 ---------- .../script/init.sh | 19 +-- .../script/predict_server.py | 26 ++-- .../script/predict_server_s.py | 104 ---------------- .../script/start_predict_server.sh | 6 +- .../script/train.py | 37 ++++-- .../script/train_s.py | 63 ---------- .../{standalone => quick_start}/data/data.csv | 0 docs/cn/standalone.md | 4 +- docs/en/standalone.md | 4 +- release/bin/start.sh | 6 +- 13 files changed, 148 insertions(+), 306 deletions(-) delete mode 100644 demo/predict-taxi-trip-duration-nb/script/import.py delete mode 100644 demo/predict-taxi-trip-duration-nb/script/predict_server_s.py delete mode 100644 demo/predict-taxi-trip-duration-nb/script/train_s.py rename demo/{standalone => quick_start}/data/data.csv (100%) diff --git a/demo/README.md b/demo/README.md index f7bc436b307..f572dd80e0d 100644 --- a/demo/README.md +++ b/demo/README.md @@ -24,25 +24,101 @@ w2 as (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN ``` ## 2. Demo with The Cluster Mode - > :warning: Required docker engine version >= 18.03 +**Start docker** +``` +docker run -it 4pdosc/openmldb:0.4.0 bash +``` +**Initialize environment** ```bash -# Pull the docker and start it -docker run -it 4pdosc/openmldb:0.3.2 bash - -# Initilize the environment ./init.sh +``` +**Create table** +```bash +# Start the OpenMLDB CLI for the cluster mode +../openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client +``` +```sql +# The below commands are executed in the CLI +> CREATE DATABASE demo_db; +> USE demo_db; +> CREATE TABLE t1(id string, vendor_id int, pickup_datetime timestamp, dropoff_datetime timestamp, passenger_count int, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, store_and_fwd_flag string, trip_duration int); +``` -# Run feature extraction and model training. Feature extraction will read offline data from the local file -python3 train.py ./fe.sql /tmp/model.txt - -# Import the data to online database -python3 import.py +**Import offline data to OpenMLDB** +```sql +# The below commands are executed in the CLI +> USE demo_db; +> SET @@execute_mode='offline'; +> LOAD DATA INFILE '/work/taxi-trip/data/taxi_tour_table_train_simple.snappy.parquet' INTO TABLE t1 options(format='parquet', header=true, mode='append'); +# You can see job status by the below command +> show jobs; +``` +**Run offline feature extraction** +```sql +# The below commands are executed in the CLI +> USE demo_db; +> SET @@execute_mode='offline'; +> SELECT trip_duration, passenger_count, +sum(pickup_latitude) OVER w AS vendor_sum_pl, +max(pickup_latitude) OVER w AS vendor_max_pl, +min(pickup_latitude) OVER w AS vendor_min_pl, +avg(pickup_latitude) OVER w AS vendor_avg_pl, +sum(pickup_latitude) OVER w2 AS pc_sum_pl, +max(pickup_latitude) OVER w2 AS pc_max_pl, +min(pickup_latitude) OVER w2 AS pc_min_pl, +avg(pickup_latitude) OVER w2 AS pc_avg_pl, +count(vendor_id) OVER w2 AS pc_cnt, +count(vendor_id) OVER w AS vendor_cnt +FROM t1 +WINDOW w AS (PARTITION BY vendor_id ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW), +w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW) INTO OUTFILE '/tmp/feature_data'; +``` +**Train model** +```bash +python3 train.py /tmp/feature_data /tmp/model.txt +``` +**Online SQL deployment** +```sql +# The below commands are executed in the CLI +> USE demo_db; +> SET @@execute_mode='online'; +> DEPLOY demo SELECT trip_duration, passenger_count, +sum(pickup_latitude) OVER w AS vendor_sum_pl, +max(pickup_latitude) OVER w AS vendor_max_pl, +min(pickup_latitude) OVER w AS vendor_min_pl, +avg(pickup_latitude) OVER w AS vendor_avg_pl, +sum(pickup_latitude) OVER w2 AS pc_sum_pl, +max(pickup_latitude) OVER w2 AS pc_max_pl, +min(pickup_latitude) OVER w2 AS pc_min_pl, +avg(pickup_latitude) OVER w2 AS pc_avg_pl, +count(vendor_id) OVER w2 AS pc_cnt, +count(vendor_id) OVER w AS vendor_cnt +FROM t1 +WINDOW w AS (PARTITION BY vendor_id ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW), +w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW); +``` +:bulb: Note that: -# Start the HTTP service for inference with OpenMLDB -./start_predict_server.sh ./fe.sql /tmp/model.txt +- The SQL used for the online deployment should be the same as that for offline feature extraction. +- Do not insert or import online data into the reference tables before deploy +**Import online data to OpenMLDB** +```sql +# The below commands are executed in the CLI +> USE demo_db; +> SET @@execute_mode='online'; +> LOAD DATA INFILE 'file:///work/taxi-trip/data/taxi_tour_table_train_simple.csv' INTO TABLE t1 options(format='csv', header=true, mode='append'); +# You can see job status by the below command +> show jobs; +``` +**Start HTTP service for inference with OpenMLDB** +```bash +./start_predict_server.sh 127.0.0.1:9080 /tmp/model.txt +``` +**Run inference with HTTP request** +```bash # Run inference with a HTTP request python3 predict.py # The following output is expected (the numbers might be slightly different) @@ -52,7 +128,7 @@ python3 predict.py ---------------predict trip_duration ------------- 848.014745715936 s ``` -:bulb: To read more details about the cluster mode, please refer to the [QuickStart (Cluster Mode)](https://github.com/4paradigm/OpenMLDB/blob/main/docs/en/cluster.md) +:bulb: To read more details about the cluster mode, please refer to the [QuickStart (Cluster Mode)](https://docs.openmldb.ai/content-1/openmldb_quickstart) ## 3. Demo with The Standalone Mode @@ -61,14 +137,14 @@ python3 predict.py **Start docker** ```bash -docker run -it 4pdosc/openmldb:0.3.2 bash +docker run -it 4pdosc/openmldb:0.4.0 bash ``` **Initialize environment** ```bash ./init.sh standalone ``` -**Create table and import the data to OpenMLDB.** +**Create table and import the data to OpenMLDB** ```bash # Start the OpenMLDB CLI for the standalone mode @@ -78,14 +154,13 @@ docker run -it 4pdosc/openmldb:0.3.2 bash # The below commands are executed in the CLI > CREATE DATABASE demo_db; > USE demo_db; -> CREATE TABLE t1(id string, vendor_id int, pickup_datetime timestamp, dropoff_datetime timestamp, passenger_count int, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, store_and_fwd_flag string, trip_duration int, INDEX(ts=pickup_datetime)); +> CREATE TABLE t1(id string, vendor_id int, pickup_datetime timestamp, dropoff_datetime timestamp, passenger_count int, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, store_and_fwd_flag string, trip_duration int); > LOAD DATA INFILE './data/taxi_tour.csv' INTO TABLE t1; ``` **Run offline feature extraction** ```sql # The below commands are executed in the CLI -> SET PERFORMANCE_SENSITIVE = false; > SELECT trip_duration, passenger_count, sum(pickup_latitude) OVER w AS vendor_sum_pl, max(pickup_latitude) OVER w AS vendor_max_pl, @@ -105,7 +180,7 @@ w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN **Train model** ```bash -python3 train_s.py /tmp/feature.csv /tmp/model.txt +python3 train.py /tmp/feature.csv /tmp/model.txt ``` **Online SQL deployment** @@ -140,7 +215,7 @@ w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN **Start HTTP service for inference with OpenMLDB** ``` -./start_predict_server.sh /tmp/model.txt +./start_predict_server.sh 127.0.0.1:8080 /tmp/model.txt ``` **Run inference with HTTP request** @@ -155,5 +230,5 @@ python3 predict.py 880.3688347542294 s ``` -:bulb: To read more details about the standalone mode, please refer to the [QuickStart (Standalone Mode)](https://github.com/4paradigm/OpenMLDB/blob/main/docs/en/standalone.md) +:bulb: To read more details about the standalone mode, please refer to the [QuickStart (Standalone Mode)](https://docs.openmldb.ai/content-1/openmldb_quickstart) diff --git a/demo/predict-taxi-trip-duration-nb/Dockerfile b/demo/predict-taxi-trip-duration-nb/Dockerfile index ef221ada3bd..522639da221 100644 --- a/demo/predict-taxi-trip-duration-nb/Dockerfile +++ b/demo/predict-taxi-trip-duration-nb/Dockerfile @@ -3,13 +3,14 @@ FROM openjdk:11.0.13-jre-slim-bullseye LABEL org.opencontainers.image.source https://github.com/4paradigm/OpenMLDB RUN apt-get update \ - && apt-get install -y --no-install-recommends libgomp1 curl binutils procps python3 python3-pip python3-numpy \ + && apt-get install -y --no-install-recommends libgomp1 curl binutils procps python3 python3-pip python3-numpy vim \ && rm -rf /var/lib/apt/lists/* \ && pip install --no-cache-dir py4j==0.10.9 numpy lightgbm tornado requests pandas openmldb COPY script /work/taxi-trip/ ENV LANG=en_US.UTF-8 +ENV SPARK_HOME=/work/openmldb/spark-3.0.0-bin-openmldbspark ARG OPENMLDB_VERSION=0.3.0 COPY setup_openmldb.sh / diff --git a/demo/predict-taxi-trip-duration-nb/script/import.py b/demo/predict-taxi-trip-duration-nb/script/import.py deleted file mode 100644 index e88a4081a03..00000000000 --- a/demo/predict-taxi-trip-duration-nb/script/import.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright 2021 4Paradigm -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -""" -import sqlalchemy as db - - -import sys -import datetime - -ddl=""" -create table t1( -id string, -vendor_id int, -pickup_datetime timestamp, -dropoff_datetime timestamp, -passenger_count int, -pickup_longitude double, -pickup_latitude double, -dropoff_longitude double, -dropoff_latitude double, -store_and_fwd_flag string, -trip_duration int, -index(key=vendor_id, ts=pickup_datetime), -index(key=passenger_count, ts=pickup_datetime) -); -""" -engine = db.create_engine('openmldb:///db_test?zk=127.0.0.1:2181&zkPath=/openmldb') -connection = engine.connect() -try: - connection.execute("create database db_test;"); -except Exception as e: - print(e) -try: - connection.execute(ddl); -except Exception as e: - print(e) - -def insert_row(line): - row = line.split(',') - row[2] = '%dl'%int(datetime.datetime.strptime(row[2], '%Y-%m-%d %H:%M:%S').timestamp() * 1000) - row[3] = '%dl'%int(datetime.datetime.strptime(row[3], '%Y-%m-%d %H:%M:%S').timestamp() * 1000) - insert = "insert into t1 values('%s', %s, %s, %s, %s, %s, %s, %s, %s, '%s', %s);"% tuple(row) - connection.execute(insert) - -with open('data/taxi_tour_table_train_simple.csv', 'r') as fd: - idx = 0 - for line in fd: - if idx == 0: - idx = idx + 1 - continue - insert_row(line.replace('\n', '')) - idx = idx + 1 diff --git a/demo/predict-taxi-trip-duration-nb/script/init.sh b/demo/predict-taxi-trip-duration-nb/script/init.sh index 5f94d1ca6f4..45d4df52827 100755 --- a/demo/predict-taxi-trip-duration-nb/script/init.sh +++ b/demo/predict-taxi-trip-duration-nb/script/init.sh @@ -20,29 +20,16 @@ MODE="cluster" if [ $# -gt 0 ]; then MODE=$1 fi -pkill mon pkill python3 rm -rf /tmp/* +cd /work/openmldb && rm -rf logs* && rm -rf db* sleep 2 if [[ "$MODE" = "standalone" ]]; then - sed -i "s/.*zk_cluster=.*/#--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/nameserver.flags - sed -i "s/.*zk_root_path=.*/#--zk_root_path=\/openmldb/g" /work/openmldb/conf/nameserver.flags - sed -i "s/.*zk_cluster=.*/#--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/tablet.flags - sed -i "s/.*zk_root_path=.*/#--zk_root_path=\/openmldb/g" /work/openmldb/conf/tablet.flags - sed -i "s/.*zk_cluster=.*/#--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/apiserver.flags - sed -i "s/.*zk_root_path=.*/#--zk_root_path=\/openmldb/g" /work/openmldb/conf/apiserver.flags python3 convert_data.py < data/taxi_tour_table_train_simple.csv > ./data/taxi_tour.csv - cd /work/openmldb && sh bin/start-all.sh + cd /work/openmldb && ./bin/stop-standalone.sh && ./bin/start-standalone.sh sleep 1 else - sed -i "s/.*zk_cluster=.*/--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/nameserver.flags - sed -i "s/.*zk_root_path=.*/--zk_root_path=\/openmldb/g" /work/openmldb/conf/nameserver.flags - sed -i "s/.*zk_cluster=.*/--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/tablet.flags - sed -i "s/.*zk_root_path=.*/--zk_root_path=\/openmldb/g" /work/openmldb/conf/tablet.flags cd /work/zookeeper-3.4.14 && ./bin/zkServer.sh restart sleep 1 - cd /work/openmldb && ./bin/start.sh start tablet - sleep 1 - cd /work/openmldb && ./bin/start.sh start nameserver - sleep 1 + cd /work/openmldb && ./bin/stop-all.sh && ./bin/start-all.sh fi diff --git a/demo/predict-taxi-trip-duration-nb/script/predict_server.py b/demo/predict-taxi-trip-duration-nb/script/predict_server.py index a4d7501ea4b..a44e8746236 100644 --- a/demo/predict-taxi-trip-duration-nb/script/predict_server.py +++ b/demo/predict-taxi-trip-duration-nb/script/predict_server.py @@ -20,14 +20,11 @@ import json import lightgbm as lgb import sqlalchemy as db +import requests import argparse -sql = "" bst = None -engine = db.create_engine('openmldb:///db_test?zk=127.0.0.1:2181&zkPath=/openmldb') -connection = engine.connect() - table_schema = [ ("id", "string"), ("vendor_id", "int"), @@ -42,6 +39,8 @@ ("trip_duration", "int"), ] +url = "" + def get_schema(): dict_schema = {} for i in table_schema: @@ -64,15 +63,19 @@ class PredictHandler(tornado.web.RequestHandler): def post(self): row = json.loads(self.request.body) data = {} + data["input"] = [] + row_data = [] for i in table_schema: if i[1] == "string": - data[i[0]] = row.get(i[0], "") + row_data.append(row.get(i[0], "")) elif i[1] == "int" or i[1] == "double" or i[1] == "timestamp" or i[1] == "bigint": - data[i[0]] = row.get(i[0], 0) + row_data.append(row.get(i[0], 0)) else: - data[i[0]] = None - rs = connection.execute(sql, data) - for r in rs: + row_data.append(None) + data["input"].append(row_data) + rs = requests.post(url, json=data) + result = json.loads(rs.text) + for r in result["data"]["data"]: ins = build_feature(r) self.write("----------------ins---------------\n") self.write(str(ins) + "\n") @@ -93,11 +96,10 @@ def make_app(): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("sql_file", help="specify the sql file") + parser.add_argument("endpoint", help="specify the endpoint of apiserver") parser.add_argument("model_path", help="specify the model path") args = parser.parse_args() - with open(args.sql_file, "r") as fd: - sql = fd.read() + url = "http://%s/dbs/demo_db/deployments/demo" % args.endpoint bst = lgb.Booster(model_file=args.model_path) app = make_app() app.listen(8887) diff --git a/demo/predict-taxi-trip-duration-nb/script/predict_server_s.py b/demo/predict-taxi-trip-duration-nb/script/predict_server_s.py deleted file mode 100644 index ecf6eb21588..00000000000 --- a/demo/predict-taxi-trip-duration-nb/script/predict_server_s.py +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright 2021 4Paradigm -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import tornado.web -import tornado.ioloop -import json -import lightgbm as lgb -import sqlalchemy as db -import requests -import argparse - -bst = None - -table_schema = [ - ("id", "string"), - ("vendor_id", "int"), - ("pickup_datetime", "timestamp"), - ("dropoff_datetime", "timestamp"), - ("passenger_count", "int"), - ("pickup_longitude", "double"), - ("pickup_latitude", "double"), - ("dropoff_longitude", "double"), - ("dropoff_latitude", "double"), - ("store_and_fwd_flag", "string"), - ("trip_duration", "int"), -] - -url = "http://127.0.0.1:8080/dbs/demo_db/deployments/demo" - -def get_schema(): - dict_schema = {} - for i in table_schema: - dict_schema[i[0]] = i[1] - return dict_schema - -dict_schema = get_schema() -json_schema = json.dumps(dict_schema) - -def build_feature(rs): - var_Y = [rs[0]] - var_X = [rs[1:12]] - return np.array(var_X) - -class SchemaHandler(tornado.web.RequestHandler): - def get(self): - self.write(json_schema) - -class PredictHandler(tornado.web.RequestHandler): - def post(self): - row = json.loads(self.request.body) - data = {} - data["input"] = [] - row_data = [] - for i in table_schema: - if i[1] == "string": - row_data.append(row.get(i[0], "")) - elif i[1] == "int" or i[1] == "double" or i[1] == "timestamp" or i[1] == "bigint": - row_data.append(row.get(i[0], 0)) - else: - row_data.append(None) - data["input"].append(row_data) - rs = requests.post(url, json=data) - result = json.loads(rs.text) - for r in result["data"]["data"]: - ins = build_feature(r) - self.write("----------------ins---------------\n") - self.write(str(ins) + "\n") - duration = bst.predict(ins) - self.write("---------------predict trip_duration -------------\n") - self.write("%s s"%str(duration[0])) - -class MainHandler(tornado.web.RequestHandler): - def get(self): - self.write("real time execute sparksql demo") - -def make_app(): - return tornado.web.Application([ - (r"/", MainHandler), - (r"/schema", SchemaHandler), - (r"/predict", PredictHandler), - ]) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("model_path", help="specify the model path") - args = parser.parse_args() - bst = lgb.Booster(model_file=args.model_path) - app = make_app() - app.listen(8887) - tornado.ioloop.IOLoop.current().start() diff --git a/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh b/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh index ec42372caa0..7c54ac93ca8 100755 --- a/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh +++ b/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh @@ -17,9 +17,5 @@ # start_predict_server.sh echo "start predict server" -if [ $# -eq 1 ]; then - nohup python3 predict_server_s.py "$1" >/tmp/p.log 2>&1 & -else - nohup python3 predict_server.py "$1" "$2" >/tmp/p.log 2>&1 & -fi +nohup python3 predict_server.py "$1" "$2" >/tmp/p.log 2>&1 & sleep 1 diff --git a/demo/predict-taxi-trip-duration-nb/script/train.py b/demo/predict-taxi-trip-duration-nb/script/train.py index e1edc9d1c5f..3952a8094fc 100644 --- a/demo/predict-taxi-trip-duration-nb/script/train.py +++ b/demo/predict-taxi-trip-duration-nb/script/train.py @@ -15,27 +15,42 @@ # limitations under the License. import lightgbm as lgb +import pandas as pd from sklearn.metrics import mean_squared_error from sklearn.model_selection import GridSearchCV -from pyspark.sql import SparkSession from sklearn.model_selection import train_test_split import argparse +import os parser = argparse.ArgumentParser() -parser.add_argument("sql_file", help="specify the sql file") -parser.add_argument("model_path", help="specify the model path") +parser.add_argument("feature_path", help="specify the feature path") +parser.add_argument("model_path", help="specify the model path") args = parser.parse_args() -with open(args.sql_file, "r") as fd: - sql = fd.read() +feature_path = args.feature_path +# merge file +if os.path.isdir(feature_path): + path_list = os.listdir(feature_path) + new_file = "/tmp/merged_feature.csv" + with open(new_file, 'w') as wf: + has_write_header = False + for filename in path_list: + if filename == "_SUCCESS" or filename.startswith('.'): + continue + with open(os.path.join(feature_path, filename), 'r') as f: + first_line = True + for line in f.readlines(): + if first_line is True: + first_line = False + if has_write_header is False: + has_write_header = True + else: + continue + wf.writelines(line) + feature_path = new_file # run batch sql and get instances -spark = SparkSession.builder.appName("OpenMLDB Demo").getOrCreate() -parquet_train = "file:////work/taxi-trip/data/taxi_tour_table_train_simple.snappy.parquet" -train = spark.read.parquet(parquet_train) -train.createOrReplaceTempView("t1") -train_df = spark.sql(sql) -df = train_df.toPandas() +df = pd.read_csv(feature_path); train_set, predict_set = train_test_split(df, test_size=0.2) y_train = train_set['trip_duration'] x_train = train_set.drop(columns=['trip_duration']) diff --git a/demo/predict-taxi-trip-duration-nb/script/train_s.py b/demo/predict-taxi-trip-duration-nb/script/train_s.py deleted file mode 100644 index 3a6bf819f0e..00000000000 --- a/demo/predict-taxi-trip-duration-nb/script/train_s.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright 2021 4Paradigm -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import lightgbm as lgb -import pandas as pd -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import train_test_split -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("feature_path", help="specify the feature path") -parser.add_argument("model_path", help="specify the model path") -args = parser.parse_args() - -# run batch sql and get instances -df = pd.read_csv(args.feature_path); -train_set, predict_set = train_test_split(df, test_size=0.2) -y_train = train_set['trip_duration'] -x_train = train_set.drop(columns=['trip_duration']) -y_predict = predict_set['trip_duration'] -x_predict = predict_set.drop(columns=['trip_duration']) - - -# training model with regression -print('Starting training...') -lgb_train = lgb.Dataset(x_train, y_train) -lgb_eval = lgb.Dataset(x_predict, y_predict, reference=lgb_train) - -# specify your configurations as a dict -params = { - 'boosting_type': 'gbdt', - 'objective': 'regression', - 'metric': {'l2', 'l1'}, - 'num_leaves': 31, - 'learning_rate': 0.05, - 'feature_fraction': 0.9, - 'bagging_fraction': 0.8, - 'bagging_freq': 5, - 'verbose': 0 -} - -gbm = lgb.train(params, - lgb_train, - num_boost_round=20, - valid_sets=lgb_eval, - early_stopping_rounds=5) - -gbm.save_model(args.model_path) -print("save model.txt done") diff --git a/demo/standalone/data/data.csv b/demo/quick_start/data/data.csv similarity index 100% rename from demo/standalone/data/data.csv rename to demo/quick_start/data/data.csv diff --git a/docs/cn/standalone.md b/docs/cn/standalone.md index 6bc235b47c5..65a9d62f610 100644 --- a/docs/cn/standalone.md +++ b/docs/cn/standalone.md @@ -11,7 +11,7 @@ 1. 拉取镜像(镜像下载大小大约 500 MB,解压后约 1.3 GB)和启动 docker 容器 ```bash - docker run -it 4pdosc/openmldb:0.3.2 bash + docker run -it 4pdosc/openmldb:0.4.0 bash ``` :bulb: **成功启动容器以后,以下命令均在容器内执行。** @@ -19,7 +19,7 @@ 2. 下载样例数据 ```bash - curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/standalone/data/data.csv --output ./data/data.csv + curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/quick_start/data/data.csv --output ./data/data.csv ``` 3. 启动 OpenMLDB 服务和 CLI diff --git a/docs/en/standalone.md b/docs/en/standalone.md index 0b84703b2be..740d3337ede 100644 --- a/docs/en/standalone.md +++ b/docs/en/standalone.md @@ -11,7 +11,7 @@ We first need to download the sample data set and start the OpenMLDB CLI. We str 1. Pull the image (download size around 500 MB) and start the container ```bash - docker run -it 4pdosc/openmldb:0.3.2 bash + docker run -it 4pdosc/openmldb:0.4.0 bash ``` **:bulb: After starting the container successfully, the following commands are all executed in the container.** @@ -19,7 +19,7 @@ We first need to download the sample data set and start the OpenMLDB CLI. We str 2. Download the sample data ```bash - curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/standalone/data/data.csv --output ./data/data.csv + curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/quick_start/data/data.csv --output ./data/data.csv ``` 3. Start the OpenMLDB service and CLI diff --git a/release/bin/start.sh b/release/bin/start.sh index e993a7ecf62..0b5f9f34671 100755 --- a/release/bin/start.sh +++ b/release/bin/start.sh @@ -63,8 +63,8 @@ case $OP in start) echo "Starting $COMPONENT ... " if [ -f "$OPENMLDB_PID_FILE" ]; then - if kill -0 "$(cat "$OPENMLDB_PID_FILE")" > /dev/null 2>&1; then - echo tablet already running as process "$(cat "$OPENMLDB_PID_FILE")". + if tr -d '\0' < "$OPENMLDB_PID_FILE" | xargs kill -0 > /dev/null 2>&1; then + echo tablet already running as process "$(tr -d '\0' < "$OPENMLDB_PID_FILE")". exit 0 fi fi @@ -83,7 +83,7 @@ case $OP in then echo "no $COMPONENT to stop (could not find file $OPENMLDB_PID_FILE)" else - kill "$(cat "$OPENMLDB_PID_FILE")" + tr -d '\0' < "$OPENMLDB_PID_FILE" | xargs kill rm "$OPENMLDB_PID_FILE" echo STOPPED fi