Skip to content

Commit

Permalink
Outputting Raw Feature Filter information: Part 2 (#258)
Browse files Browse the repository at this point in the history
* added Raw Feature Filter config and metrics to ModelInsights; fixed serialization; added unit test for reader on old workflow
  • Loading branch information
clin-projects authored Apr 10, 2019
1 parent 7bcd1e4 commit 7d6ec61
Show file tree
Hide file tree
Showing 10 changed files with 639 additions and 52 deletions.
12 changes: 8 additions & 4 deletions core/src/main/scala/com/salesforce/op/ModelInsights.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ package com.salesforce.op
import com.salesforce.op.evaluators._
import com.salesforce.op.features._
import com.salesforce.op.features.types._
import com.salesforce.op.filters.{ExclusionReasons, FeatureDistribution, RawFeatureFilterResults}
import com.salesforce.op.filters._
import com.salesforce.op.stages._
import com.salesforce.op.stages.impl.feature.TransmogrifierDefaults
import com.salesforce.op.stages.impl.preparators._
Expand Down Expand Up @@ -322,13 +322,13 @@ case class Continuous(min: Double, max: Double, mean: Double, variance: Double)
*/
case class Discrete(domain: Seq[String], prob: Seq[Double]) extends LabelInfo


/**
* Summary of feature insights for all features derived from a given input (raw) feature
*
* @param featureName name of raw feature insights are about
* @param featureType type of raw feature insights are about
* @param derivedFeatures sequence containing insights for each feature derived from the raw feature
* @param metrics sequence containing metrics computed in RawFeatureFilter
* @param distributions distribution information for the raw feature (if calculated in RawFeatureFilter)
* @param exclusionReasons exclusion reasons for the raw feature (if calculated in RawFeatureFilter)
*
Expand All @@ -338,6 +338,7 @@ case class FeatureInsights
featureName: String,
featureType: String,
derivedFeatures: Seq[Insights],
metrics: Seq[RawFeatureFilterMetrics] = Seq.empty,
distributions: Seq[FeatureDistribution] = Seq.empty,
exclusionReasons: Seq[ExclusionReasons] = Seq.empty
)
Expand Down Expand Up @@ -482,13 +483,15 @@ case object ModelInsights {
s"Found ${vectorInput.map(_.name + " as feature vector").getOrElse("no feature vector")}" +
s" to fill in model insights"
)

ModelInsights(
label = getLabelSummary(label, checkerSummary),
features = getFeatureInsights(vectorInput, checkerSummary, model, rawFeatures,
blacklistedFeatures, blacklistedMapKeys, rawFeatureFilterResults),
selectedModelInfo = getModelInfo(model),
trainingParams = trainingParams,
stageInfo = getStageInfo(stages)
stageInfo = RawFeatureFilterConfig.toStageInfo(rawFeatureFilterResults.rawFeatureFilterConfig)
++ getStageInfo(stages)
)
}

Expand Down Expand Up @@ -622,10 +625,11 @@ case object ModelInsights {
val ftype = allFeatures.find(_.name == fname)
.map(_.typeName)
.getOrElse("")
val metrics = rawFeatureFilterResults.rawFeatureFilterMetrics.filter(_.name == fname)
val distributions = rawFeatureFilterResults.rawFeatureDistributions.filter(_.name == fname)
val exclusionReasons = rawFeatureFilterResults.exclusionReasons.filter(_.name == fname)
FeatureInsights(featureName = fname, featureType = ftype, derivedFeatures = seq.map(_._2),
distributions = distributions, exclusionReasons = exclusionReasons)
metrics = metrics, distributions = distributions, exclusionReasons = exclusionReasons)
}.toSeq
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,8 +544,8 @@ class RawFeatureFilter[T]
maxJSDivergence = maxJSDivergence,
maxCorrelation = maxCorrelation,
correlationType = correlationType,
jsDivergenceProtectedFeatures = jsDivergenceProtectedFeatures,
protectedFeatures = protectedFeatures
jsDivergenceProtectedFeatures = jsDivergenceProtectedFeatures.toSeq,
protectedFeatures = protectedFeatures.toSeq
)

val featureDistributions =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,22 @@

package com.salesforce.op.filters

import com.salesforce.op.features.FeatureDistributionType
import com.salesforce.op.stages.impl.preparators.CorrelationType
import com.salesforce.op.utils.json.{EnumEntrySerializer, SpecialDoubleSerializer}
import org.json4s.jackson.JsonMethods._
import org.json4s.jackson.Serialization
import org.json4s.{DefaultFormats, Formats}

import scala.util.Try

trait RawFeatureFilterFormats {
implicit val jsonFormats: Formats = DefaultFormats +
new SpecialDoubleSerializer +
EnumEntrySerializer.json4s[CorrelationType](CorrelationType) +
EnumEntrySerializer.json4s[FeatureDistributionType](FeatureDistributionType)
}

/**
* Contains configuration and results from RawFeatureFilter
*
Expand All @@ -52,10 +62,7 @@ case class RawFeatureFilterResults
exclusionReasons: Seq[ExclusionReasons] = Seq.empty
)

object RawFeatureFilterResults {

implicit val jsonFormats: Formats = DefaultFormats

object RawFeatureFilterResults extends RawFeatureFilterFormats {
/**
* RawFeatureFilterResults to json
*
Expand Down Expand Up @@ -86,10 +93,37 @@ case class RawFeatureFilterConfig
maxJSDivergence: Double = 1.0,
maxCorrelation: Double = 1.0,
correlationType: CorrelationType = CorrelationType.Pearson,
jsDivergenceProtectedFeatures: Set[String] = Set.empty,
protectedFeatures: Set[String] = Set.empty
jsDivergenceProtectedFeatures: Seq[String] = Seq.empty,
protectedFeatures: Seq[String] = Seq.empty
)

object RawFeatureFilterConfig extends RawFeatureFilterFormats {

/**
* Converts case class constructor to a Map. Values are converted to String
*
* @return Map[String, String]
*/
def toStringMap(config: RawFeatureFilterConfig): Map[String, String] = {
val params = parse(Serialization.write[RawFeatureFilterConfig](config)).extract[Map[String, Any]]
params.map { case (key, value) => (key, value.toString) }
}

/**
* Summarize RawFeatureFilterConfig in format of stageInfo; this info will be passed alongside stage info in
* ModelInsights
*
* @return Map[String, Map[String, Any] ]
*/
def toStageInfo(config: RawFeatureFilterConfig): Map[String, Map[String, Any]] = {
val stageName = "rawFeatureFilter"
val uid = "rawFeatureFilter"
Map(stageName -> Map("uid" -> uid, "params" -> toStringMap(config))
)
}

}

/**
* Contains raw feature metrics computing in Raw Feature Filter
*
Expand Down
Empty file.
Loading

0 comments on commit 7d6ec61

Please sign in to comment.