Assist user in selecting field on which to filter
/*
This notebook demonstrates how to help a user select a field and value for filtering based on simple text input.
For example, on the dataset https://opendata.dc.gov/datasets/crime-incidents-in-2019, a user searches "burglary", the user is given a suggestion to filter by the value BURGLARY in the field offense.
To play with a different dataset, change the datasetToSearch value in this cell, or change the first value passed to the assist function in the third cell.
*/
const fetch = require('node-fetch')
const _ = require('lodash')
const {fuzzy} = require("fast-fuzzy")
const datasetToSearch = 'common ownership lots'
const datasets = (await fetch(`https://hub.arcgis.com/api/v3/datasets?q=${datasetToSearch}&fields[datasets]=name,source,statistics&page[size]=1`).then(r => r.json())).data
const dataset = datasets[0]
const statistics = dataset.attributes.statistics
function formatOneField (fieldName, field, fieldType) {
const stats = _.get(field, 'statistics.values.max') ? field.statistics.values : _.omit(field.statistics, 'values')
return {
type: 'field',
fieldType,
value: fieldName,
lowerValue: fieldName.toLowerCase(),
...stats
}
}
const fields = _.flatMap(Object.keys(statistics), type => {
return Object.entries(statistics[type]).map(([name, field]) => formatOneField(name, field, type))
})
fields[0]
function formatOneStat (fieldName, field) {
return _.get(field, 'statistics.values', []).map(({value, count}) => {
return {
type: 'fieldValue',
fieldName,
value,
lowerValue: value.toLowerCase(),
count,
proportion: count / field.statistics.count
}
})
}
const strings = _.flatMap(Object.entries(statistics.string), (([fieldName, field]) => {
return formatOneStat(fieldName, field)
}))
strings[0]
const searchData = [...fields, ...strings]
console.log(`${searchData.length}, ${fields.length}, ${strings.length}`)
function assist (input) {
const lowerInput = input.toLowerCase()
const results = _.sortBy(searchData, string => {
const score = fuzzy(lowerInput, string.lowerValue)
string.score = score
return -score
})
return results.slice(0, 10).map(formatResult).filter(r => r.score >.7)
}
function formatResult (hit) {
if (hit.type === 'fieldValue') {
return {
message: `Filter by: ${hit.value} in field: ${hit.fieldName}?`,
score: hit.score
}
} else {
return {
message: `Summarize by ${hit.fieldType}: ${hit.value}?`,
score: hit.score
}
}
}
// change the value passed to this function to search a different term
assist('equity residential')
no comments