Skip to content

Commit

Permalink
Updated scripts for auditing releases
Browse files Browse the repository at this point in the history
- Added script to automatically generate change list CHANGES.txt
- Added test for verifying linking against maven distributions of `spark-sql` and `spark-hive`
- Added SBT projects for testing functionality of `spark-sql` and `spark-hive`
- Fixed issues in existing tests that might have come up because of changes in Spark 1.0

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes mesos#844 from tdas/update-dev-scripts and squashes the following commits:

25090ba [Tathagata Das] Added missing license
e2e20b3 [Tathagata Das] Updated tests for auditing releases.
  • Loading branch information
tdas committed May 23, 2014
1 parent cce7745 commit b2bdd0e
Show file tree
Hide file tree
Showing 11 changed files with 547 additions and 6 deletions.
11 changes: 7 additions & 4 deletions dev/audit-release/audit_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,12 @@ def get_url(url):
# For each of these modules, we'll test an 'empty' application in sbt and
# maven that links against them. This will catch issues with messed up
# dependencies within those projects.
modules = ["spark-core", "spark-bagel", "spark-mllib", "spark-streaming", "spark-repl",
"spark-graphx", "spark-streaming-flume", "spark-streaming-kafka",
"spark-streaming-mqtt", "spark-streaming-twitter", "spark-streaming-zeromq"]
modules = [
"spark-core", "spark-bagel", "spark-mllib", "spark-streaming", "spark-repl",
"spark-graphx", "spark-streaming-flume", "spark-streaming-kafka",
"spark-streaming-mqtt", "spark-streaming-twitter", "spark-streaming-zeromq",
"spark-catalyst", "spark-sql", "spark-hive"
]
modules = map(lambda m: "%s_%s" % (m, SCALA_BINARY_VERSION), modules)

# Check for directories that might interfere with tests
Expand All @@ -122,7 +125,7 @@ def ensure_path_not_present(x):
os.chdir(original_dir)

# SBT application tests
for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming"]:
for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", "sbt_app_sql", "sbt_app_hive"]:
os.chdir(app)
ret = run_cmd("sbt clean run", exit_on_failure=False)
test(ret == 0, "sbt application (%s)" % app)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ public static void main(String[] args) {
System.exit(-1);
}
System.out.println("Test succeeded");
sc.stop();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package main.scala

import scala.util.Try

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._

Expand All @@ -30,7 +31,6 @@ object SimpleApp {
}
val logFile = "input.txt"
val sc = new SparkContext(conf)
SparkContext.jarOfClass(this.getClass).foreach(sc.addJar)
val logData = sc.textFile(logFile, 2).cache()
val numAs = logData.filter(line => line.contains("a")).count()
val numBs = logData.filter(line => line.contains("b")).count()
Expand Down
29 changes: 29 additions & 0 deletions dev/audit-release/sbt_app_hive/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

name := "Simple Project"

version := "1.0"

scalaVersion := System.getenv.get("SCALA_VERSION")

libraryDependencies += "org.apache.spark" %% "spark-hive" % System.getenv.get("SPARK_VERSION")

resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Akka Repository" at "http://repo.akka.io/releases/",
"Spray Repository" at "http://repo.spray.cc/")
9 changes: 9 additions & 0 deletions dev/audit-release/sbt_app_hive/data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
0val_0
1val_1
2val_2
3val_3
4val_4
5val_5
6val_6
7val_7
9val_9
213 changes: 213 additions & 0 deletions dev/audit-release/sbt_app_hive/src/main/resources/hive-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
<!-- resource). -->

<!-- Hive Execution Parameters -->

<property name="build.dir" value="build" />

<property>
<name>build.dir</name>
<value>${user.dir}/build</value>
</property>

<property>
<name>build.dir.hive</name>
<value>${build.dir}/hive</value>
</property>

<property>
<name>hadoop.tmp.dir</name>
<value>${build.dir.hive}/test/hadoop-${user.name}</value>
<description>A base for other temporary directories.</description>
</property>

<!--
<property>
<name>hive.exec.reducers.max</name>
<value>1</value>
<description>maximum number of reducers</description>
</property>
-->

<property>
<name>hive.exec.scratchdir</name>
<value>${build.dir}/scratchdir</value>
<description>Scratch space for Hive jobs</description>
</property>

<property>
<name>hive.exec.local.scratchdir</name>
<value>${build.dir}/localscratchdir/</value>
<description>Local scratch space for Hive jobs</description>
</property>

<property>
<name>javax.jdo.option.ConnectionURL</name>
<!-- note: variable substituion not working here because it's loaded by jdo, not Hive -->
<value>jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true</value>
</property>

<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.apache.derby.jdbc.EmbeddedDriver</value>
</property>

<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>APP</value>
</property>

<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>mine</value>
</property>

<property>
<!-- this should eventually be deprecated since the metastore should supply this -->
<name>hive.metastore.warehouse.dir</name>
<value>${test.warehouse.dir}</value>
<description></description>
</property>

<property>
<name>hive.metastore.metadb.dir</name>
<value>${build.dir}/test/data/metadb/</value>
<description>
Required by metastore server or if the uris argument below is not supplied
</description>
</property>

<property>
<name>test.log.dir</name>
<value>${build.dir}/test/logs</value>
<description></description>
</property>

<property>
<name>test.src.dir</name>
<value>${build.dir}/src/test</value>
<description></description>
</property>

<!--
<property>
<name>test.data.files</name>
<value>${user.dir}/../data/files</value>
<description></description>
</property>
<property>
<name>test.query.file1</name>
<value>file://${user.dir}/../ql/src/test/org/apache/hadoop/hive/ql/input2.q</value>
<value></value>
<description></description>
</property>
-->

<property>
<name>hive.jar.path</name>
<value>${build.dir.hive}/ql/hive-exec-${version}.jar</value>
<description></description>
</property>

<property>
<name>hive.metastore.rawstore.impl</name>
<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
<description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description>
</property>

<property>
<name>hive.querylog.location</name>
<value>${build.dir}/tmp</value>
<description>Location of the structured hive logs</description>
</property>

<!--
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
<description>Pre Execute Hook for Tests</description>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
<description>Post Execute Hook for Tests</description>
</property>
-->

<property>
<name>hive.task.progress</name>
<value>false</value>
<description>Track progress of a task</description>
</property>

<property>
<name>hive.support.concurrency</name>
<value>false</value>
<description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description>
</property>

<property>
<name>fs.pfile.impl</name>
<value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
<description>A proxy for local file system used for cross file system testing</description>
</property>

<property>
<name>hive.exec.mode.local.auto</name>
<value>false</value>
<description>
Let hive determine whether to run in local mode automatically
Disabling this for tests so that minimr is not affected
</description>
</property>

<property>
<name>hive.auto.convert.join</name>
<value>false</value>
<description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description>
</property>

<property>
<name>hive.ignore.mapjoin.hint</name>
<value>false</value>
<description>Whether Hive ignores the mapjoin hint</description>
</property>

<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
<description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description>
</property>

<property>
<name>hive.default.rcfile.serde</name>
<value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
<description>The default SerDe hive will use for the rcfile format</description>
</property>

</configuration>
57 changes: 57 additions & 0 deletions dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package main.scala

import scala.collection.mutable.{ListBuffer, Queue}

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.hive.LocalHiveContext

case class Person(name: String, age: Int)

object SparkSqlExample {

def main(args: Array[String]) {
val conf = sys.env.get("SPARK_AUDIT_MASTER") match {
case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master)
case None => new SparkConf().setAppName("Simple Sql App")
}
val sc = new SparkContext(conf)
val hiveContext = new LocalHiveContext(sc)

import hiveContext._
hql("DROP TABLE IF EXISTS src")
hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
hql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src")
val results = hql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect()
results.foreach(println)

def test(f: => Boolean, failureMsg: String) = {
if (!f) {
println(failureMsg)
System.exit(-1)
}
}

test(results.size == 5, "Unexpected number of selected elements: " + results)
println("Test succeeded")
sc.stop()
}
}
29 changes: 29 additions & 0 deletions dev/audit-release/sbt_app_sql/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

name := "Simple Project"

version := "1.0"

scalaVersion := System.getenv.get("SCALA_VERSION")

libraryDependencies += "org.apache.spark" %% "spark-sql" % System.getenv.get("SPARK_VERSION")

resolvers ++= Seq(
"Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
"Akka Repository" at "http://repo.akka.io/releases/",
"Spray Repository" at "http://repo.spray.cc/")
Loading

0 comments on commit b2bdd0e

Please sign in to comment.