-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.sbt
160 lines (145 loc) · 6.77 KB
/
build.sbt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
name := "vlm-performance"
version := "0.1.0-SNAPSHOT"
scalaVersion := "2.11.12"
crossScalaVersions := Seq("2.12.8", "2.11.12")
organization := "com.azavea"
scalacOptions ++= Seq(
"-deprecation",
"-unchecked",
"-language:implicitConversions",
"-language:reflectiveCalls",
"-language:higherKinds",
"-language:postfixOps",
"-language:existentials",
"-feature"
)
headerLicense := Some(HeaderLicense.ALv2("2019", "Azavea"))
resolvers ++= Seq(
Resolver.bintrayRepo("azavea", "maven"),
Resolver.bintrayRepo("azavea", "geotrellis"),
"locationtech-releases" at "https://repo.locationtech.org/content/groups/releases",
"locationtech-snapshots" at "https://repo.locationtech.org/content/groups/snapshots"
)
outputStrategy := Some(StdoutOutput)
addCompilerPlugin("org.typelevel" % "kind-projector" % "0.10.0" cross CrossVersion.binary)
addCompilerPlugin("org.scalamacros" %% "paradise" % "2.1.1" cross CrossVersion.full)
fork := true
libraryDependencies ++= Seq(
"com.azavea.geotrellis" %% "geotrellis-contrib-vlm" % "3.14.0-SNAPSHOT",
"com.azavea.geotrellis" %% "geotrellis-contrib-gdal" % "3.14.0-SNAPSHOT",
"org.apache.spark" %% "spark-core" % "2.4.2",
"org.apache.spark" %% "spark-sql" % "2.4.2",
"org.scalatest" %% "scalatest" % "3.0.7" % Test
)
dependencyOverrides += "com.azavea.gdal" % "gdal-warp-bindings" % "33.5523882"
test in assembly := {}
assemblyShadeRules in assembly := {
Seq(ShadeRule.rename("shapeless.**" -> s"com.azavea.shaded.shapeless.@1").inAll)
}
assemblyMergeStrategy in assembly := {
case "reference.conf" => MergeStrategy.concat
case "application.conf" => MergeStrategy.concat
case PathList("META-INF", xs@_*) => xs match {
case ("MANIFEST.MF" :: Nil) => MergeStrategy.discard
// Concatenate everything in the services directory to keep GeoTools happy.
case ("services" :: _ :: Nil) => MergeStrategy.concat
// Concatenate these to keep JAI happy.
case ("javax.media.jai.registryFile.jai" :: Nil) | ("registryFile.jai" :: Nil) | ("registryFile.jaiext" :: Nil) => MergeStrategy.concat
case (name :: Nil) => {
// Must exclude META-INF/*.([RD]SA|SF) to avoid "Invalid signature file digest for Manifest main attributes" exception.
if (name.endsWith(".RSA") || name.endsWith(".DSA") || name.endsWith(".SF")) MergeStrategy.discard else MergeStrategy.first
}
case _ => MergeStrategy.first
}
case _ => MergeStrategy.first
}
// Settings from sbt-lighter plugin that will automate creating and submitting this job to EMR
import sbtlighter._
LighterPlugin.disable
lazy val Ingest = config("ingest")
lazy val IngestRasterSourceGDAL = config("ingestRasterSourceGDAL")
lazy val EMRSettings = LighterPlugin.baseSettings ++ Seq(
sparkEmrRelease := "emr-5.23.0",
sparkAwsRegion := "us-east-1",
sparkEmrApplications := Seq("Hadoop", "Spark", "Ganglia", "Zeppelin"),
sparkEmrBootstrap := List(
BootstrapAction(
"Install GDAL dependencies",
"s3://geotrellis-test/usbuildings/bootstrap.sh",
"s3://geotrellis-test/usbuildings", "v1.0"
)
),
sparkS3JarFolder := "s3://geotrellis-test/rastersource-performance/jars",
sparkInstanceCount := 21,
sparkMasterType := "i3.xlarge",
sparkCoreType := "i3.xlarge",
sparkMasterPrice := Some(0.2),
sparkCorePrice := Some(0.2),
sparkClusterName := s"GeoTrellis VLM Performance ${sys.env.getOrElse("USER", "<anonymous user>")}",
sparkEmrServiceRole := "EMR_DefaultRole",
sparkInstanceRole := "EMR_EC2_DefaultRole",
sparkMasterEbsSize := None, // Some(64)
sparkCoreEbsSize := None, // Some(64)
sparkJobFlowInstancesConfig := sparkJobFlowInstancesConfig.value.withEc2KeyName("geotrellis-emr"),
sparkS3LogUri := Some("s3://geotrellis-test/rastersource-performance/logs"),
sparkEmrConfigs := List(
EmrConfig("spark").withProperties(
"maximizeResourceAllocation" -> "false" // be careful with setting this param to true
),
EmrConfig("spark-defaults").withProperties(
"spark.driver.maxResultSize" -> "4200M",
"spark.dynamicAllocation.enabled" -> "true",
"spark.shuffle.service.enabled" -> "true",
"spark.shuffle.compress" -> "true",
"spark.shuffle.spill.compress" -> "true",
"spark.rdd.compress" -> "true",
"spark.driver.extraJavaOptions" -> "-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p' -Dgeotrellis.s3.threads.rdd.write=64",
"spark.executor.extraJavaOptions" -> "-XX:+UseParallelGC -XX:+UseParallelOldGC -XX:OnOutOfMemoryError='kill -9 %p' -Dgeotrellis.s3.threads.rdd.write=64"
),
EmrConfig("spark-env").withProperties(
"LD_LIBRARY_PATH" -> "/usr/local/lib"
),
EmrConfig("yarn-site").withProperties(
"yarn.resourcemanager.am.max-attempts" -> "1",
"yarn.nodemanager.vmem-check-enabled" -> "false",
"yarn.nodemanager.pmem-check-enabled" -> "false"
)
)
)
addCommandAlias("create-cluster", "ingest:sparkCreateCluster")
addCommandAlias("ingest-ned", "ingest:sparkSubmitMain geotrellis.contrib.performance.Ingest ned")
addCommandAlias("ingest-nlcd", "ingest:sparkSubmitMain geotrellis.contrib.performance.Ingest nlcd")
inConfig(Ingest)(EMRSettings ++ Seq(
sparkSubmitConfs := Map(
"spark.master" -> "yarn",
"spark.driver.memory" -> "4200M",
"spark.driver.cores" -> "2",
"spark.executor.memory" -> "1500M",
"spark.executor.cores" -> "1",
/*"spark.dynamicAllocation.enabled" -> "false",
"spark.executor.instances" -> "200",*/
"spark.yarn.executor.memoryOverhead" -> "700",
"spark.yarn.driver.memoryOverhead" -> "700"/*,
"spark.dynamicAllocation.minExecutors" -> "200",
"spark.dynamicAllocation.maxExecutors" -> "200"*/
)
))
addCommandAlias("ingest-raster-source-ned-geotiff", "ingest:sparkSubmitMain geotrellis.contrib.performance.IngestRasterSource ned geotiff")
addCommandAlias("ingest-raster-source-nlcd-geotiff", "ingest:sparkSubmitMain geotrellis.contrib.performance.IngestRasterSource nlcd geotiff")
addCommandAlias("ingest-raster-source-ned-gdal", "ingestRasterSourceGDAL:sparkSubmitMain geotrellis.contrib.performance.IngestRasterSource ned gdal")
addCommandAlias("ingest-raster-source-nlcd-gdal", "ingestRasterSourceGDAL:sparkSubmitMain geotrellis.contrib.performance.IngestRasterSource nlcd gdal")
inConfig(IngestRasterSourceGDAL)(EMRSettings ++ Seq(
sparkSubmitConfs := Map(
"spark.master" -> "yarn",
"spark.driver.memory" -> "4200M",
"spark.driver.cores" -> "2",
"spark.executor.memory" -> "4500M",
"spark.executor.cores" -> "1",
"spark.dynamicAllocation.enabled" -> "false",
"spark.executor.instances" -> "250", // 70 for 20 nodes cluster
"spark.yarn.executor.memoryOverhead" -> "700",
"spark.yarn.driver.memoryOverhead" -> "700"/*,
"spark.dynamicAllocation.minExecutors" -> "200",
"spark.dynamicAllocation.maxExecutors" -> "200"*/
)
))