Skip to content

Latest commit

 

History

History
130 lines (114 loc) · 2.25 KB

field_collapse.md

File metadata and controls

130 lines (114 loc) · 2.25 KB

field collapse

description

deduplicate documents based on timestamp at search time rather than updating existing documents

################################### clean up ###################################

DELETE /index-000001
DELETE /index-000002
DELETE /_ingest/pipeline/ingest_timestamp

################################################################################

# create a pipeline that will add a timestamp to our data

PUT /_ingest/pipeline/ingest_timestamp
{
  "processors": [
    {
      "set": {
        "field": "@timestamp",
        "value": "{{_ingest.timestamp}}"
      }
    }
  ]
}

# create 2 mappings 1 for `index-000001` and 1 for `index-000002`, 
# they will have the same settings

PUT /index-000001
{
  "aliases": {
    "index": {}
  }, 
  "settings": {
    "index": {
      "sort.field": "@timestamp",
      "sort.order": "desc"
    }
  },
  "mappings": {
    "properties": {
      "@timestamp": {
        "type": "date"
      },
      "field": {
        "type": "text"
      },
      "search_field": {
        "type": "text"
      },
      "collapse_field": {
        "type": "long"
      }
    }
  }
}

PUT /index-000002
{
  "aliases": {
    "index": {}
  }, 
  "settings": {
    "index": {
      "sort.field": "@timestamp",
      "sort.order": "desc"
    }
  },
  "mappings": {
    "properties": {
      "@timestamp": {
        "type": "date"
      },
      "field": {
        "type": "text"
      },
      "search_field": {
        "type": "text"
      },
      "collapse_field": {
        "type": "long"
      }
    }
  }
}

# create the document in `index-000001`

POST /index-000001/_doc?pipeline=ingest_timestamp
{
  "collapse_field": 1234,
  "field": "value",
  "search_field": "my awesome stuff"
}

# wait a little while and then index the update document to `index-000002`

POST /index-000002/_doc?pipeline=ingest_timestamp
{
  "collapse_field": 1234,
  "field": "updated_value",
  "search_field": "my awesome stuff"
}

# test search to see the update value only

GET /index*/_search
{
  "query": {
    "match": {
      "search_field": "my awesome stuff"
    }
  },
  "collapse": {
    "field": "collapse_field"         
  },
  "sort": [
    {
      "@timestamp": { 
        "order": "desc"
      }
    }
  ],
  "from": 0                    
}