From 1e643c58b58f93a62eb8f5b79e3b92e68b0f62f1 Mon Sep 17 00:00:00 2001
From: dl239 <jeremydeng012@gmail.com>
Date: Thu, 13 Jan 2022 02:22:25 -0600
Subject: [PATCH] feat: update demo (#1031)

---
 demo/README.md                                | 115 +++++++++++++++---
 demo/predict-taxi-trip-duration-nb/Dockerfile |   3 +-
 .../script/import.py                          |  67 ----------
 .../script/init.sh                            |  19 +--
 .../script/predict_server.py                  |  26 ++--
 .../script/predict_server_s.py                | 104 ----------------
 .../script/start_predict_server.sh            |   6 +-
 .../script/train.py                           |  37 ++++--
 .../script/train_s.py                         |  63 ----------
 .../{standalone => quick_start}/data/data.csv |   0
 docs/cn/standalone.md                         |   4 +-
 docs/en/standalone.md                         |   4 +-
 release/bin/start.sh                          |   6 +-
 13 files changed, 148 insertions(+), 306 deletions(-)
 delete mode 100644 demo/predict-taxi-trip-duration-nb/script/import.py
 delete mode 100644 demo/predict-taxi-trip-duration-nb/script/predict_server_s.py
 delete mode 100644 demo/predict-taxi-trip-duration-nb/script/train_s.py
 rename demo/{standalone => quick_start}/data/data.csv (100%)

diff --git a/demo/README.md b/demo/README.md
index f7bc436b307..f572dd80e0d 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -24,25 +24,101 @@ w2 as (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN
 ```
 
 ## 2. Demo with The Cluster Mode
-
 > :warning: Required docker engine version >= 18.03
 
+**Start docker**
+```
+docker run -it 4pdosc/openmldb:0.4.0 bash
+```
+**Initialize environment**
 ```bash
-# Pull the docker and start it
-docker run -it 4pdosc/openmldb:0.3.2 bash
-
-# Initilize the environment
 ./init.sh
+```
+**Create table**
+```bash
+# Start the OpenMLDB CLI for the cluster mode
+../openmldb/bin/openmldb --zk_cluster=127.0.0.1:2181 --zk_root_path=/openmldb --role=sql_client
+```
+```sql
+# The below commands are executed in the CLI
+> CREATE DATABASE demo_db;
+> USE demo_db;
+> CREATE TABLE t1(id string, vendor_id int, pickup_datetime timestamp, dropoff_datetime timestamp, passenger_count int, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, store_and_fwd_flag string, trip_duration int);
+```
 
-# Run feature extraction and model training. Feature extraction will read offline data from the local file
-python3 train.py ./fe.sql /tmp/model.txt
-
-# Import the data to online database
-python3 import.py
+**Import offline data to OpenMLDB**
+```sql
+# The below commands are executed in the CLI
+> USE demo_db;
+> SET @@execute_mode='offline';
+> LOAD DATA INFILE '/work/taxi-trip/data/taxi_tour_table_train_simple.snappy.parquet' INTO TABLE t1 options(format='parquet', header=true, mode='append');
+# You can see job status by the below command
+> show jobs;
+```
+**Run offline feature extraction**
+```sql
+# The below commands are executed in the CLI
+> USE demo_db;
+> SET @@execute_mode='offline';
+> SELECT trip_duration, passenger_count,
+sum(pickup_latitude) OVER w AS vendor_sum_pl,
+max(pickup_latitude) OVER w AS vendor_max_pl,
+min(pickup_latitude) OVER w AS vendor_min_pl,
+avg(pickup_latitude) OVER w AS vendor_avg_pl,
+sum(pickup_latitude) OVER w2 AS pc_sum_pl,
+max(pickup_latitude) OVER w2 AS pc_max_pl,
+min(pickup_latitude) OVER w2 AS pc_min_pl,
+avg(pickup_latitude) OVER w2 AS pc_avg_pl,
+count(vendor_id) OVER w2 AS pc_cnt,
+count(vendor_id) OVER w AS vendor_cnt
+FROM t1
+WINDOW w AS (PARTITION BY vendor_id ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW),
+w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW) INTO OUTFILE '/tmp/feature_data';
+```
+**Train model**
+```bash
+python3 train.py /tmp/feature_data /tmp/model.txt
+```
+**Online SQL deployment**
+```sql
+# The below commands are executed in the CLI
+> USE demo_db;
+> SET @@execute_mode='online';
+> DEPLOY demo SELECT trip_duration, passenger_count,
+sum(pickup_latitude) OVER w AS vendor_sum_pl,
+max(pickup_latitude) OVER w AS vendor_max_pl,
+min(pickup_latitude) OVER w AS vendor_min_pl,
+avg(pickup_latitude) OVER w AS vendor_avg_pl,
+sum(pickup_latitude) OVER w2 AS pc_sum_pl,
+max(pickup_latitude) OVER w2 AS pc_max_pl,
+min(pickup_latitude) OVER w2 AS pc_min_pl,
+avg(pickup_latitude) OVER w2 AS pc_avg_pl,
+count(vendor_id) OVER w2 AS pc_cnt,
+count(vendor_id) OVER w AS vendor_cnt
+FROM t1
+WINDOW w AS (PARTITION BY vendor_id ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW),
+w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN 1d PRECEDING AND CURRENT ROW);
+```
+:bulb: Note that:
 
-# Start the HTTP service for inference with OpenMLDB
-./start_predict_server.sh ./fe.sql /tmp/model.txt
+- The SQL used for the online deployment should be the same as that for offline feature extraction.
+- Do not insert or import online data into the reference tables before deploy
 
+**Import online data to OpenMLDB**
+```sql
+# The below commands are executed in the CLI
+> USE demo_db;
+> SET @@execute_mode='online';
+> LOAD DATA INFILE 'file:///work/taxi-trip/data/taxi_tour_table_train_simple.csv' INTO TABLE t1 options(format='csv', header=true, mode='append');
+# You can see job status by the below command
+> show jobs;
+```
+**Start HTTP service for inference with OpenMLDB**
+```bash
+./start_predict_server.sh 127.0.0.1:9080 /tmp/model.txt
+```
+**Run inference with HTTP request**
+```bash
 # Run inference with a HTTP request
 python3 predict.py
 # The following output is expected (the numbers might be slightly different)
@@ -52,7 +128,7 @@ python3 predict.py
 ---------------predict trip_duration -------------
 848.014745715936 s
 ```
-:bulb: To read more details about the cluster mode, please refer to the [QuickStart (Cluster Mode)](https://github.com/4paradigm/OpenMLDB/blob/main/docs/en/cluster.md)
+:bulb: To read more details about the cluster mode, please refer to the [QuickStart (Cluster Mode)](https://docs.openmldb.ai/content-1/openmldb_quickstart)
 
 ## 3. Demo with The Standalone Mode
 
@@ -61,14 +137,14 @@ python3 predict.py
 **Start docker**
 
 ```bash
-docker run -it 4pdosc/openmldb:0.3.2 bash
+docker run -it 4pdosc/openmldb:0.4.0 bash
 ```
 **Initialize environment**
 
 ```bash
 ./init.sh standalone
 ```
-**Create table and import the data to OpenMLDB.**
+**Create table and import the data to OpenMLDB**
 
 ```bash
 # Start the OpenMLDB CLI for the standalone mode
@@ -78,14 +154,13 @@ docker run -it 4pdosc/openmldb:0.3.2 bash
 # The below commands are executed in the CLI
 > CREATE DATABASE demo_db;
 > USE demo_db;
-> CREATE TABLE t1(id string, vendor_id int, pickup_datetime timestamp, dropoff_datetime timestamp, passenger_count int, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, store_and_fwd_flag string, trip_duration int, INDEX(ts=pickup_datetime));
+> CREATE TABLE t1(id string, vendor_id int, pickup_datetime timestamp, dropoff_datetime timestamp, passenger_count int, pickup_longitude double, pickup_latitude double, dropoff_longitude double, dropoff_latitude double, store_and_fwd_flag string, trip_duration int);
 > LOAD DATA INFILE './data/taxi_tour.csv' INTO TABLE t1;
 ```
 **Run offline feature extraction**
 
 ```sql
 # The below commands are executed in the CLI
-> SET PERFORMANCE_SENSITIVE = false;
 > SELECT trip_duration, passenger_count,
 sum(pickup_latitude) OVER w AS vendor_sum_pl,
 max(pickup_latitude) OVER w AS vendor_max_pl,
@@ -105,7 +180,7 @@ w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN
 **Train model**
 
 ```bash
-python3 train_s.py /tmp/feature.csv /tmp/model.txt
+python3 train.py /tmp/feature.csv /tmp/model.txt
 ```
 **Online SQL deployment**
 
@@ -140,7 +215,7 @@ w2 AS (PARTITION BY passenger_count ORDER BY pickup_datetime ROWS_RANGE BETWEEN
 **Start HTTP service for inference with OpenMLDB**
 
 ```
-./start_predict_server.sh /tmp/model.txt
+./start_predict_server.sh 127.0.0.1:8080 /tmp/model.txt
 ```
 
 **Run inference with HTTP request**
@@ -155,5 +230,5 @@ python3 predict.py
 880.3688347542294 s
 ```
 
-:bulb: To read more details about the standalone mode, please refer to the [QuickStart (Standalone Mode)](https://github.com/4paradigm/OpenMLDB/blob/main/docs/en/standalone.md)
+:bulb: To read more details about the standalone mode, please refer to the [QuickStart (Standalone Mode)](https://docs.openmldb.ai/content-1/openmldb_quickstart)
 
diff --git a/demo/predict-taxi-trip-duration-nb/Dockerfile b/demo/predict-taxi-trip-duration-nb/Dockerfile
index ef221ada3bd..522639da221 100644
--- a/demo/predict-taxi-trip-duration-nb/Dockerfile
+++ b/demo/predict-taxi-trip-duration-nb/Dockerfile
@@ -3,13 +3,14 @@ FROM openjdk:11.0.13-jre-slim-bullseye
 LABEL org.opencontainers.image.source https://github.com/4paradigm/OpenMLDB
 
 RUN  apt-get update \
-     && apt-get install -y --no-install-recommends libgomp1 curl binutils procps python3 python3-pip python3-numpy \
+     && apt-get install -y --no-install-recommends libgomp1 curl binutils procps python3 python3-pip python3-numpy vim \
      && rm -rf /var/lib/apt/lists/* \
      && pip install --no-cache-dir py4j==0.10.9 numpy lightgbm tornado requests pandas openmldb
 
 COPY script /work/taxi-trip/
 
 ENV LANG=en_US.UTF-8
+ENV SPARK_HOME=/work/openmldb/spark-3.0.0-bin-openmldbspark
 ARG OPENMLDB_VERSION=0.3.0
 
 COPY setup_openmldb.sh /
diff --git a/demo/predict-taxi-trip-duration-nb/script/import.py b/demo/predict-taxi-trip-duration-nb/script/import.py
deleted file mode 100644
index e88a4081a03..00000000000
--- a/demo/predict-taxi-trip-duration-nb/script/import.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright 2021 4Paradigm
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-"""
-import sqlalchemy as db
-
-
-import sys
-import datetime
-
-ddl="""
-create table t1(
-id string,
-vendor_id int,
-pickup_datetime timestamp,
-dropoff_datetime timestamp,
-passenger_count int,
-pickup_longitude double,
-pickup_latitude double,
-dropoff_longitude double,
-dropoff_latitude double,
-store_and_fwd_flag string,
-trip_duration int,
-index(key=vendor_id, ts=pickup_datetime),
-index(key=passenger_count, ts=pickup_datetime)
-);
-"""
-engine = db.create_engine('openmldb:///db_test?zk=127.0.0.1:2181&zkPath=/openmldb')
-connection = engine.connect()
-try:
-    connection.execute("create database db_test;");
-except Exception as e:
-    print(e)
-try:
-    connection.execute(ddl);
-except Exception as e:
-    print(e)
-
-def insert_row(line):
-    row = line.split(',')
-    row[2] = '%dl'%int(datetime.datetime.strptime(row[2], '%Y-%m-%d %H:%M:%S').timestamp() * 1000)
-    row[3] = '%dl'%int(datetime.datetime.strptime(row[3], '%Y-%m-%d %H:%M:%S').timestamp() * 1000)
-    insert = "insert into t1 values('%s', %s, %s, %s, %s, %s, %s, %s, %s, '%s', %s);"% tuple(row)
-    connection.execute(insert)
-
-with open('data/taxi_tour_table_train_simple.csv', 'r') as fd:
-    idx = 0
-    for line in fd:
-        if idx == 0:
-            idx = idx + 1
-            continue
-        insert_row(line.replace('\n', ''))
-        idx = idx + 1
diff --git a/demo/predict-taxi-trip-duration-nb/script/init.sh b/demo/predict-taxi-trip-duration-nb/script/init.sh
index 5f94d1ca6f4..45d4df52827 100755
--- a/demo/predict-taxi-trip-duration-nb/script/init.sh
+++ b/demo/predict-taxi-trip-duration-nb/script/init.sh
@@ -20,29 +20,16 @@ MODE="cluster"
 if [ $# -gt 0 ]; then
     MODE=$1
 fi
-pkill mon
 pkill python3
 rm -rf /tmp/*
+cd /work/openmldb && rm -rf logs* && rm -rf db*
 sleep 2
 if [[ "$MODE" = "standalone" ]]; then
-    sed -i "s/.*zk_cluster=.*/#--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/nameserver.flags
-    sed -i "s/.*zk_root_path=.*/#--zk_root_path=\/openmldb/g" /work/openmldb/conf/nameserver.flags
-    sed -i "s/.*zk_cluster=.*/#--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/tablet.flags
-    sed -i "s/.*zk_root_path=.*/#--zk_root_path=\/openmldb/g" /work/openmldb/conf/tablet.flags
-    sed -i "s/.*zk_cluster=.*/#--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/apiserver.flags
-    sed -i "s/.*zk_root_path=.*/#--zk_root_path=\/openmldb/g" /work/openmldb/conf/apiserver.flags
     python3 convert_data.py < data/taxi_tour_table_train_simple.csv  > ./data/taxi_tour.csv
-    cd /work/openmldb && sh bin/start-all.sh
+    cd /work/openmldb && ./bin/stop-standalone.sh && ./bin/start-standalone.sh
     sleep 1
 else
-    sed -i "s/.*zk_cluster=.*/--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/nameserver.flags
-    sed -i "s/.*zk_root_path=.*/--zk_root_path=\/openmldb/g" /work/openmldb/conf/nameserver.flags
-    sed -i "s/.*zk_cluster=.*/--zk_cluster=127.0.0.1:2181/g" /work/openmldb/conf/tablet.flags
-    sed -i "s/.*zk_root_path=.*/--zk_root_path=\/openmldb/g" /work/openmldb/conf/tablet.flags
     cd /work/zookeeper-3.4.14 && ./bin/zkServer.sh restart
     sleep 1
-    cd /work/openmldb && ./bin/start.sh start tablet
-    sleep 1
-    cd /work/openmldb && ./bin/start.sh start nameserver
-    sleep 1
+    cd /work/openmldb && ./bin/stop-all.sh && ./bin/start-all.sh
 fi
diff --git a/demo/predict-taxi-trip-duration-nb/script/predict_server.py b/demo/predict-taxi-trip-duration-nb/script/predict_server.py
index a4d7501ea4b..a44e8746236 100644
--- a/demo/predict-taxi-trip-duration-nb/script/predict_server.py
+++ b/demo/predict-taxi-trip-duration-nb/script/predict_server.py
@@ -20,14 +20,11 @@
 import json
 import lightgbm as lgb
 import sqlalchemy as db
+import requests
 import argparse
 
-sql = ""
 bst = None
 
-engine = db.create_engine('openmldb:///db_test?zk=127.0.0.1:2181&zkPath=/openmldb')
-connection = engine.connect()
-
 table_schema = [
 	("id", "string"),
 	("vendor_id", "int"),
@@ -42,6 +39,8 @@
 	("trip_duration", "int"),
 ]
 
+url = ""
+
 def get_schema():
     dict_schema = {}
     for i in table_schema:
@@ -64,15 +63,19 @@ class PredictHandler(tornado.web.RequestHandler):
     def post(self):
         row = json.loads(self.request.body)
         data = {}
+        data["input"] = []
+        row_data = []
         for i in table_schema:
             if i[1] == "string":
-                data[i[0]] = row.get(i[0], "")
+                row_data.append(row.get(i[0], ""))
             elif i[1] == "int" or i[1] == "double" or i[1] == "timestamp" or i[1] == "bigint":
-                data[i[0]] = row.get(i[0], 0)
+                row_data.append(row.get(i[0], 0))
             else:
-                data[i[0]] = None
-        rs = connection.execute(sql, data)
-        for r in rs:
+                row_data.append(None)
+        data["input"].append(row_data)       
+        rs = requests.post(url, json=data)
+        result = json.loads(rs.text)
+        for r in result["data"]["data"]:
             ins = build_feature(r)
             self.write("----------------ins---------------\n")
             self.write(str(ins) + "\n")
@@ -93,11 +96,10 @@ def make_app():
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("sql_file", help="specify the sql file")
+    parser.add_argument("endpoint",  help="specify the endpoint of apiserver")
     parser.add_argument("model_path",  help="specify the model path")
     args = parser.parse_args()
-    with open(args.sql_file, "r") as fd:
-      sql = fd.read()
+    url = "http://%s/dbs/demo_db/deployments/demo" % args.endpoint
     bst = lgb.Booster(model_file=args.model_path)
     app = make_app()
     app.listen(8887)
diff --git a/demo/predict-taxi-trip-duration-nb/script/predict_server_s.py b/demo/predict-taxi-trip-duration-nb/script/predict_server_s.py
deleted file mode 100644
index ecf6eb21588..00000000000
--- a/demo/predict-taxi-trip-duration-nb/script/predict_server_s.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright 2021 4Paradigm
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import tornado.web
-import tornado.ioloop
-import json
-import lightgbm as lgb
-import sqlalchemy as db
-import requests
-import argparse
-
-bst = None
-
-table_schema = [
-	("id", "string"),
-	("vendor_id", "int"),
-	("pickup_datetime", "timestamp"),
-	("dropoff_datetime", "timestamp"),
-	("passenger_count", "int"),
-	("pickup_longitude", "double"),
-	("pickup_latitude", "double"),
-	("dropoff_longitude", "double"),
-	("dropoff_latitude", "double"),
-	("store_and_fwd_flag", "string"),
-	("trip_duration", "int"),
-]
-
-url = "http://127.0.0.1:8080/dbs/demo_db/deployments/demo"
-
-def get_schema():
-    dict_schema = {}
-    for i in table_schema:
-        dict_schema[i[0]] = i[1]
-    return dict_schema
-
-dict_schema = get_schema()
-json_schema = json.dumps(dict_schema)
-
-def build_feature(rs):
-    var_Y = [rs[0]]
-    var_X = [rs[1:12]]
-    return np.array(var_X)
-
-class SchemaHandler(tornado.web.RequestHandler):
-    def get(self):
-        self.write(json_schema)
-
-class PredictHandler(tornado.web.RequestHandler):
-    def post(self):
-        row = json.loads(self.request.body)
-        data = {}
-        data["input"] = []
-        row_data = []
-        for i in table_schema:
-            if i[1] == "string":
-                row_data.append(row.get(i[0], ""))
-            elif i[1] == "int" or i[1] == "double" or i[1] == "timestamp" or i[1] == "bigint":
-                row_data.append(row.get(i[0], 0))
-            else:
-                row_data.append(None)
-        data["input"].append(row_data)       
-        rs = requests.post(url, json=data)
-        result = json.loads(rs.text)
-        for r in result["data"]["data"]:
-            ins = build_feature(r)
-            self.write("----------------ins---------------\n")
-            self.write(str(ins) + "\n")
-            duration = bst.predict(ins)
-            self.write("---------------predict trip_duration -------------\n")
-            self.write("%s s"%str(duration[0]))
-
-class MainHandler(tornado.web.RequestHandler):
-    def get(self):
-        self.write("real time execute sparksql demo")
-
-def make_app():
-    return tornado.web.Application([
-        (r"/", MainHandler),
-        (r"/schema", SchemaHandler),
-        (r"/predict", PredictHandler),
-    ])
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("model_path",  help="specify the model path")
-    args = parser.parse_args()
-    bst = lgb.Booster(model_file=args.model_path)
-    app = make_app()
-    app.listen(8887)
-    tornado.ioloop.IOLoop.current().start()
diff --git a/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh b/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh
index ec42372caa0..7c54ac93ca8 100755
--- a/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh
+++ b/demo/predict-taxi-trip-duration-nb/script/start_predict_server.sh
@@ -17,9 +17,5 @@
 # start_predict_server.sh
 
 echo "start predict server"
-if [ $# -eq 1 ]; then
-    nohup python3 predict_server_s.py "$1" >/tmp/p.log 2>&1 &
-else
-    nohup python3 predict_server.py "$1" "$2" >/tmp/p.log 2>&1 &
-fi
+nohup python3 predict_server.py "$1" "$2" >/tmp/p.log 2>&1 &
 sleep 1
diff --git a/demo/predict-taxi-trip-duration-nb/script/train.py b/demo/predict-taxi-trip-duration-nb/script/train.py
index e1edc9d1c5f..3952a8094fc 100644
--- a/demo/predict-taxi-trip-duration-nb/script/train.py
+++ b/demo/predict-taxi-trip-duration-nb/script/train.py
@@ -15,27 +15,42 @@
 # limitations under the License.
 
 import lightgbm as lgb
+import pandas as pd
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import GridSearchCV
-from pyspark.sql import SparkSession
 from sklearn.model_selection import train_test_split
 import argparse
+import os
 
 parser = argparse.ArgumentParser()
-parser.add_argument("sql_file", help="specify the sql file")
-parser.add_argument("model_path",  help="specify the model path")
+parser.add_argument("feature_path", help="specify the feature path")
+parser.add_argument("model_path", help="specify the model path")
 args = parser.parse_args()
 
-with open(args.sql_file, "r") as fd:
-    sql = fd.read()
+feature_path = args.feature_path
+# merge file
+if os.path.isdir(feature_path):
+    path_list = os.listdir(feature_path)
+    new_file = "/tmp/merged_feature.csv"
+    with open(new_file, 'w') as wf:
+        has_write_header = False
+        for filename in path_list:
+            if filename == "_SUCCESS" or filename.startswith('.'):
+                continue
+            with open(os.path.join(feature_path, filename), 'r') as f:
+                first_line = True
+                for line in f.readlines():
+                    if first_line is True:
+                        first_line = False
+                        if has_write_header is False:
+                            has_write_header = True
+                        else:
+                            continue
+                    wf.writelines(line)
+    feature_path = new_file
 
 # run batch sql and get instances
-spark = SparkSession.builder.appName("OpenMLDB Demo").getOrCreate()
-parquet_train = "file:////work/taxi-trip/data/taxi_tour_table_train_simple.snappy.parquet"
-train = spark.read.parquet(parquet_train)
-train.createOrReplaceTempView("t1")
-train_df = spark.sql(sql)
-df = train_df.toPandas()
+df = pd.read_csv(feature_path);
 train_set, predict_set = train_test_split(df, test_size=0.2)
 y_train = train_set['trip_duration']
 x_train = train_set.drop(columns=['trip_duration'])
diff --git a/demo/predict-taxi-trip-duration-nb/script/train_s.py b/demo/predict-taxi-trip-duration-nb/script/train_s.py
deleted file mode 100644
index 3a6bf819f0e..00000000000
--- a/demo/predict-taxi-trip-duration-nb/script/train_s.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright 2021 4Paradigm
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import lightgbm as lgb
-import pandas as pd
-from sklearn.metrics import mean_squared_error
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import train_test_split
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument("feature_path", help="specify the feature path")
-parser.add_argument("model_path", help="specify the model path")
-args = parser.parse_args()
-
-# run batch sql and get instances
-df = pd.read_csv(args.feature_path);
-train_set, predict_set = train_test_split(df, test_size=0.2)
-y_train = train_set['trip_duration']
-x_train = train_set.drop(columns=['trip_duration'])
-y_predict = predict_set['trip_duration']
-x_predict = predict_set.drop(columns=['trip_duration'])
-
-
-# training model with regression
-print('Starting training...')
-lgb_train = lgb.Dataset(x_train, y_train)
-lgb_eval = lgb.Dataset(x_predict, y_predict, reference=lgb_train)
-
-# specify your configurations as a dict
-params = {
-    'boosting_type': 'gbdt',
-    'objective': 'regression',
-    'metric': {'l2', 'l1'},
-    'num_leaves': 31,
-    'learning_rate': 0.05,
-    'feature_fraction': 0.9,
-    'bagging_fraction': 0.8,
-    'bagging_freq': 5,
-    'verbose': 0
-}
-
-gbm = lgb.train(params,
-                lgb_train,
-                num_boost_round=20,
-                valid_sets=lgb_eval,
-                early_stopping_rounds=5)
-
-gbm.save_model(args.model_path)
-print("save model.txt done")
diff --git a/demo/standalone/data/data.csv b/demo/quick_start/data/data.csv
similarity index 100%
rename from demo/standalone/data/data.csv
rename to demo/quick_start/data/data.csv
diff --git a/docs/cn/standalone.md b/docs/cn/standalone.md
index 6bc235b47c5..65a9d62f610 100644
--- a/docs/cn/standalone.md
+++ b/docs/cn/standalone.md
@@ -11,7 +11,7 @@
 1. 拉取镜像（镜像下载大小大约 500 MB，解压后约 1.3 GB）和启动 docker 容器
 
    ```bash
-   docker run -it 4pdosc/openmldb:0.3.2 bash
+   docker run -it 4pdosc/openmldb:0.4.0 bash
    ```
 
    :bulb: **成功启动容器以后，以下命令均在容器内执行。**
@@ -19,7 +19,7 @@
 2. 下载样例数据
 
    ```bash
-   curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/standalone/data/data.csv --output ./data/data.csv
+   curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/quick_start/data/data.csv --output ./data/data.csv
    ```
 
 3. 启动 OpenMLDB 服务和 CLI
diff --git a/docs/en/standalone.md b/docs/en/standalone.md
index 0b84703b2be..740d3337ede 100644
--- a/docs/en/standalone.md
+++ b/docs/en/standalone.md
@@ -11,7 +11,7 @@ We first need to download the sample data set and start the OpenMLDB CLI. We str
 1. Pull the image (download size around 500 MB) and start the container 
 
    ```bash
-   docker run -it 4pdosc/openmldb:0.3.2 bash
+   docker run -it 4pdosc/openmldb:0.4.0 bash
    ```
 
    **:bulb: After starting the container successfully, the following commands are all executed in the container.**
@@ -19,7 +19,7 @@ We first need to download the sample data set and start the OpenMLDB CLI. We str
 2. Download the sample data
 
    ```bash
-   curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/standalone/data/data.csv --output ./data/data.csv
+   curl https://raw.githubusercontent.com/4paradigm/OpenMLDB/main/demo/quick_start/data/data.csv --output ./data/data.csv
    ```
 
 3. Start the OpenMLDB service and CLI
diff --git a/release/bin/start.sh b/release/bin/start.sh
index e993a7ecf62..0b5f9f34671 100755
--- a/release/bin/start.sh
+++ b/release/bin/start.sh
@@ -63,8 +63,8 @@ case $OP in
     start)
         echo "Starting $COMPONENT ... "
         if [ -f "$OPENMLDB_PID_FILE" ]; then
-            if kill -0 "$(cat "$OPENMLDB_PID_FILE")" > /dev/null 2>&1; then
-                echo tablet already running as process "$(cat "$OPENMLDB_PID_FILE")".
+            if tr -d '\0' < "$OPENMLDB_PID_FILE" | xargs kill -0 > /dev/null 2>&1; then
+                echo tablet already running as process "$(tr -d '\0' < "$OPENMLDB_PID_FILE")".
                 exit 0
             fi
         fi
@@ -83,7 +83,7 @@ case $OP in
         then
              echo "no $COMPONENT to stop (could not find file $OPENMLDB_PID_FILE)"
         else
-            kill "$(cat "$OPENMLDB_PID_FILE")"
+            tr -d '\0' < "$OPENMLDB_PID_FILE" | xargs kill
             rm "$OPENMLDB_PID_FILE"
             echo STOPPED
         fi