summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/jetpack/modules/search/class.jetpack-search.php')
-rw-r--r--plugins/jetpack/modules/search/class.jetpack-search.php1313
1 files changed, 1313 insertions, 0 deletions
diff --git a/plugins/jetpack/modules/search/class.jetpack-search.php b/plugins/jetpack/modules/search/class.jetpack-search.php
new file mode 100644
index 00000000..cdd10504
--- /dev/null
+++ b/plugins/jetpack/modules/search/class.jetpack-search.php
@@ -0,0 +1,1313 @@
+<?php
+
+class Jetpack_Search {
+
+ protected $found_posts = 0;
+
+ /**
+ * The maximum offset ('from' param), since deep pages get exponentially slower.
+ *
+ * @see https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html
+ */
+ protected $max_offset = 200;
+
+ protected $search_result;
+
+ protected $original_blog_id;
+ protected $jetpack_blog_id;
+
+ protected $aggregations = array();
+ protected $max_aggregations_count = 100;
+
+ protected static $instance;
+
+ //Languages with custom analyzers, other languages are supported,
+ // but are analyzed with the default analyzer.
+ public static $analyzed_langs = array( 'ar', 'bg', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'eu', 'fa', 'fi', 'fr', 'he', 'hi', 'hu', 'hy', 'id', 'it', 'ja', 'ko', 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' );
+
+ protected function __construct() {
+ /* Don't do anything, needs to be initialized via instance() method */
+ }
+
+ public function __clone() {
+ wp_die( "Please don't __clone Jetpack_Search" );
+ }
+
+ public function __wakeup() {
+ wp_die( "Please don't __wakeup Jetpack_Search" );
+ }
+
+ /**
+ * Get singleton instance of Jetpack_Search
+ *
+ * Instantiates and sets up a new instance if needed, or returns the singleton
+ *
+ * @module search
+ *
+ * @return Jetpack_Search The Jetpack_Search singleton
+ */
+ public static function instance() {
+ if ( ! isset( self::$instance ) ) {
+ self::$instance = new Jetpack_Search();
+
+ self::$instance->setup();
+ }
+
+ return self::$instance;
+ }
+
+ /**
+ * Perform various setup tasks for the class
+ *
+ * Checks various pre-requisites and adds hooks
+ *
+ * @module search
+ */
+ public function setup() {
+ if ( ! Jetpack::is_active() ) {
+ return;
+ }
+
+ $this->jetpack_blog_id = Jetpack::get_option( 'id' );
+
+ if ( ! $this->jetpack_blog_id ) {
+ return;
+ }
+
+ $this->init_hooks();
+ }
+
+ /**
+ * Setup the various hooks needed for the plugin to take over Search duties
+ *
+ * @module search
+ */
+ public function init_hooks() {
+ add_action( 'widgets_init', array( $this, 'action__widgets_init' ) );
+
+ if ( ! is_admin() ) {
+ add_filter( 'posts_pre_query', array( $this, 'filter__posts_pre_query' ), 10, 2 );
+
+ add_filter( 'jetpack_search_es_wp_query_args', array( $this, 'filter__add_date_filter_to_query' ), 10, 2 );
+ }
+ }
+
+ /*
+ * Run a search on the WP.com public API.
+ *
+ * @module search
+ *
+ * @param array $es_args Args conforming to the WP.com /sites/<blog_id>/search endpoint
+ *
+ * @return object|WP_Error The response from the public api, or a WP_Error
+ */
+ public function search( array $es_args ) {
+ $service_url = 'https://public-api.wordpress.com/rest/v1/sites/' . $this->jetpack_blog_id . '/search';
+
+ $start_time = microtime( true );
+
+ $request = wp_remote_post( $service_url, array(
+ 'headers' => array(
+ 'Content-Type' => 'application/json',
+ ),
+ 'timeout' => 10,
+ 'user-agent' => 'jetpack_search',
+ 'body' => json_encode( $es_args ),
+ ) );
+
+ $end_time = microtime( true );
+
+ if ( is_wp_error( $request ) ) {
+ return $request;
+ }
+ $response_code = wp_remote_retrieve_response_code( $request );
+
+ if ( ! $response_code || $response_code < 200 || $response_code >= 300 ) {
+ return new WP_Error( 'invalid_search_api_response', 'Invalid response from API - ' . $response_code );
+ }
+
+ $response = json_decode( wp_remote_retrieve_body( $request ), true );
+
+ $took = is_array( $response ) && $response['took'] ? $response['took'] : null;
+
+ $query = array(
+ 'args' => $es_args,
+ 'response' => $response,
+ 'response_code' => $response_code,
+ 'elapsed_time' => ( $end_time - $start_time ) * 1000, // Convert from float seconds to ms
+ 'es_time' => $took,
+ 'url' => $service_url,
+ );
+
+ /**
+ * Fires after a search request has been performed
+ *
+ * Includes the following info in the $query parameter:
+ *
+ * array args Array of Elasticsearch arguments for the search
+ * array response Raw API response, JSON decoded
+ * int response_code HTTP response code of the request
+ * float elapsed_time Roundtrip time of the search request, in milliseconds
+ * float es_time Amount of time Elasticsearch spent running the request, in milliseconds
+ * string url API url that was queried
+ *
+ * @since 5.0
+ *
+ * @param array $query Array of information about the query performed
+ */
+ do_action( 'did_jetpack_search_query', $query );
+
+ return $response;
+ }
+
+ /**
+ * Bypass the normal Search query and offload it to Jetpack servers
+ *
+ * This is the main hook of the plugin and is responsible for returning the posts that match the search query
+ *
+ * @module search
+ *
+ * @param array $posts Current array of posts (still pre-query)
+ * @param WP_Query $query The WP_Query being filtered
+ *
+ * @return array Array of matching posts
+ */
+ public function filter__posts_pre_query( $posts, $query ) {
+ if ( ! $query->is_main_query() || ! $query->is_search() ) {
+ return $posts;
+ }
+
+ $this->do_search( $query );
+
+ if ( ! is_array( $this->search_result ) ) {
+ return $posts;
+ }
+
+ // If no results, nothing to do
+ if ( ! count( $this->search_result['results']['hits'] ) ) {
+ return array();
+ }
+
+ $post_ids = array();
+
+ foreach ( $this->search_result['results']['hits'] as $result ) {
+ $post_ids[] = (int) $result['fields']['post_id'];
+ }
+
+ // Query all posts now
+ $args = array(
+ 'post__in' => $post_ids,
+ 'perm' => 'readable',
+ 'post_type' => 'any',
+ );
+
+ $posts_query = new WP_Query( $args );
+
+ // WP Core doesn't call the set_found_posts and its filters when filtering posts_pre_query like we do, so need to
+ // do these manually
+ $query->found_posts = $this->found_posts;
+ $query->max_num_pages = ceil( $this->found_posts / $query->get( 'posts_per_page' ) );
+
+ return $posts_query->posts;
+ }
+
+ /**
+ * Build up the search, then run it against the Jetpack servers
+ *
+ * @param WP_Query $query The original WP_Query to use for the parameters of our search
+ */
+ public function do_search( WP_Query $query ) {
+ if ( ! $query->is_main_query() || ! $query->is_search() ) {
+ return;
+ }
+
+ $page = ( $query->get( 'paged' ) ) ? absint( $query->get( 'paged' ) ) : 1;
+
+ $posts_per_page = $query->get( 'posts_per_page' );
+
+ // ES API does not allow more than 15 results at a time
+ if ( $posts_per_page > 15 ) {
+ $posts_per_page = 15;
+ }
+
+ // Start building the WP-style search query args
+ // They'll be translated to ES format args later
+ $es_wp_query_args = array(
+ 'query' => $query->get( 's' ),
+ 'posts_per_page' => $posts_per_page,
+ 'paged' => $page,
+ 'orderby' => $query->get( 'orderby' ),
+ 'order' => $query->get( 'order' ),
+ );
+
+ if ( ! empty( $this->aggregations ) ) {
+ $es_wp_query_args['aggregations'] = $this->aggregations;
+ }
+
+ // Did we query for authors?
+ if ( $query->get( 'author_name' ) ) {
+ $es_wp_query_args['author_name'] = $query->get( 'author_name' );
+ }
+
+ $es_wp_query_args['post_type'] = $this->get_es_wp_query_post_type_for_query( $query );
+
+ $es_wp_query_args['terms'] = $this->get_es_wp_query_terms_for_query( $query );
+
+
+ /**
+ * Modify the search query parameters, such as controlling the post_type.
+ *
+ * These arguments are in the format of WP_Query arguments
+ *
+ * @module search
+ *
+ * @since 5.0.0
+ *
+ * @param array $es_wp_query_args The current query args, in WP_Query format
+ * @param WP_Query $query The original query object
+ */
+ $es_wp_query_args = apply_filters( 'jetpack_search_es_wp_query_args', $es_wp_query_args, $query );
+
+ // If page * posts_per_page is greater than our max offset, send a 404. This is necessary because the offset is
+ // capped at $this->max_offset, so a high page would always return the last page of results otherwise
+ if ( ( $es_wp_query_args['paged'] * $es_wp_query_args['posts_per_page'] ) > $this->max_offset ) {
+ $query->set_404();
+
+ return;
+ }
+
+ // If there were no post types returned, then 404 to avoid querying against non-public post types, which could
+ // happen if we don't add the post type restriction to the ES query
+ if ( empty( $es_wp_query_args['post_type'] ) ) {
+ $query->set_404();
+
+ return;
+ }
+
+ // Convert the WP-style args into ES args
+ $es_query_args = $this->convert_wp_es_to_es_args( $es_wp_query_args );
+
+ //Only trust ES to give us IDs, not the content since it is a mirror
+ $es_query_args['fields'] = array(
+ 'post_id',
+ );
+
+ /**
+ * Modify the underlying ES query that is passed to the search endpoint. The returned args must represent a valid ES query
+ *
+ * This filter is harder to use if you're unfamiliar with ES, but allows complete control over the query
+ *
+ * @module search
+ *
+ * @since 5.0.0
+ *
+ * @param array $es_query_args The raw ES query args
+ * @param WP_Query $query The original query object
+ */
+ $es_query_args = apply_filters( 'jetpack_search_es_query_args', $es_query_args, $query );
+
+ // Do the actual search query!
+ $this->search_result = $this->search( $es_query_args );
+
+ if ( is_wp_error( $this->search_result ) || ! is_array( $this->search_result ) || empty( $this->search_result['results'] ) || empty( $this->search_result['results']['hits'] ) ) {
+ $this->found_posts = 0;
+
+ return;
+ }
+
+ // Total number of results for paging purposes. Capped at $this->>max_offset + $posts_per_page, as deep paging
+ // gets quite expensive
+ $this->found_posts = min( $this->search_result['results']['total'], $this->max_offset + $posts_per_page );
+
+ return;
+ }
+
+ /**
+ * Given a WP_Query, convert its WP_Tax_Query (if present) into the WP-style ES term arguments for the search
+ *
+ * @module search
+ *
+ * @param WP_Query $query The original WP_Query object for which to parse the taxonomy query
+ *
+ * @return array The new WP-style ES arguments (that will be converted into 'real' ES arguments)
+ */
+ public function get_es_wp_query_terms_for_query( WP_Query $query ) {
+ $args = array();
+
+ $the_tax_query = $query->tax_query;
+
+ if ( ! $the_tax_query ) {
+ return $args;
+ }
+
+
+ if ( ! $the_tax_query instanceof WP_Tax_Query || empty( $the_tax_query->queried_terms ) || ! is_array( $the_tax_query->queried_terms ) ) {
+ return $args;
+ }
+
+ $args = array();
+
+ foreach ( $the_tax_query->queries as $tax_query ) {
+ // Right now we only support slugs...see note above
+ if ( 'slug' !== $tax_query['field'] ) {
+ continue;
+ }
+
+ $taxonomy = $tax_query['taxonomy'];
+
+ if ( ! isset( $args[ $taxonomy ] ) || ! is_array( $args[ $taxonomy ] ) ) {
+ $args[ $taxonomy ] = array();
+ }
+
+ $args[ $taxonomy ] = array_merge( $args[ $taxonomy ], $tax_query['terms'] );
+ }
+
+ return $args;
+ }
+
+ /**
+ * Parse out the post type from a WP_Query
+ *
+ * Only allows post types that are not marked as 'exclude_from_search'
+ *
+ * @module search
+ *
+ * @param WP_Query $query Original WP_Query object
+ *
+ * @return array Array of searchable post types corresponding to the original query
+ */
+ public function get_es_wp_query_post_type_for_query( WP_Query $query ) {
+ $post_types = $query->get( 'post_type' );
+
+ // If we're searching 'any', we want to only pass searchable post types to ES
+ if ( 'any' === $post_types ) {
+ $post_types = array_values( get_post_types( array(
+ 'exclude_from_search' => false,
+ ) ) );
+ }
+
+ if ( ! is_array( $post_types ) ) {
+ $post_types = array( $post_types );
+ }
+
+ $post_types = array_unique( $post_types );
+
+ $sanitized_post_types = array();
+
+ // Make sure the post types are queryable
+ foreach ( $post_types as $post_type ) {
+ if ( ! $post_type ) {
+ continue;
+ }
+
+ $post_type_object = get_post_type_object( $post_type );
+
+ if ( ! $post_type_object || $post_type_object->exclude_from_search ) {
+ continue;
+ }
+
+ $sanitized_post_types[] = $post_type;
+ }
+
+ return $sanitized_post_types;
+ }
+
+ /**
+ * Initialze widgets for the Search module
+ *
+ * @module search
+ */
+ public function action__widgets_init() {
+ require_once( dirname( __FILE__ ) . '/class.jetpack-search-widget-filters.php' );
+
+ register_widget( 'Jetpack_Search_Widget_Filters' );
+ }
+
+ /**
+ * Get the Elasticsearch result
+ *
+ * @module search
+ *
+ * @param bool $raw If true, does not check for WP_Error or return the 'results' array - the JSON decoded HTTP response
+ *
+ * @return array|bool The search results, or false if there was a failure
+ */
+ public function get_search_result( $raw = false ) {
+ if ( $raw ) {
+ return $this->search_result;
+ }
+
+ return ( ! empty( $this->search_result ) && ! is_wp_error( $this->search_result ) && is_array( $this->search_result ) && ! empty( $this->search_result['results'] ) ) ? $this->search_result['results'] : false;
+ }
+
+ /**
+ * Add the date portion of a WP_Query onto the query args
+ *
+ * @param array $es_wp_query_args
+ * @param WP_Query $query The original WP_Query
+ *
+ * @return array The es wp query args, with date filters added (as needed)
+ */
+ public function filter__add_date_filter_to_query( array $es_wp_query_args, WP_Query $query ) {
+ if ( $query->get( 'year' ) ) {
+ if ( $query->get( 'monthnum' ) ) {
+ // Padding
+ $date_monthnum = sprintf( '%02d', $query->get( 'monthnum' ) );
+
+ if ( $query->get( 'day' ) ) {
+ // Padding
+ $date_day = sprintf( '%02d', $query->get( 'day' ) );
+
+ $date_start = $query->get( 'year' ) . '-' . $date_monthnum . '-' . $date_day . ' 00:00:00';
+ $date_end = $query->get( 'year' ) . '-' . $date_monthnum . '-' . $date_day . ' 23:59:59';
+ } else {
+ $days_in_month = date( 't', mktime( 0, 0, 0, $query->get( 'monthnum' ), 14, $query->get( 'year' ) ) ); // 14 = middle of the month so no chance of DST issues
+
+ $date_start = $query->get( 'year' ) . '-' . $date_monthnum . '-01 00:00:00';
+ $date_end = $query->get( 'year' ) . '-' . $date_monthnum . '-' . $days_in_month . ' 23:59:59';
+ }
+ } else {
+ $date_start = $query->get( 'year' ) . '-01-01 00:00:00';
+ $date_end = $query->get( 'year' ) . '-12-31 23:59:59';
+ }
+
+ $es_wp_query_args['date_range'] = array(
+ 'field' => 'date',
+ 'gte' => $date_start,
+ 'lte' => $date_end,
+ );
+ }
+
+ return $es_wp_query_args;
+ }
+
+ /**
+ * Converts WP_Query style args to ES args
+ *
+ * @module search
+ *
+ * @param array $args Array of WP_Query style arguments
+ *
+ * @return array Array of ES style query arguments
+ */
+ function convert_wp_es_to_es_args( array $args ) {
+ jetpack_require_lib( 'jetpack-wpes-query-builder' );
+
+ $builder = new Jetpack_WPES_Query_Builder();
+
+ $defaults = array(
+ 'blog_id' => get_current_blog_id(),
+
+ 'query' => null, // Search phrase
+ 'query_fields' => array( 'title', 'content', 'author', 'tag', 'category' ),
+
+ 'post_type' => null, // string or an array
+ 'terms' => array(), // ex: array( 'taxonomy-1' => array( 'slug' ), 'taxonomy-2' => array( 'slug-a', 'slug-b' ) )
+
+ 'author' => null, // id or an array of ids
+ 'author_name' => array(), // string or an array
+
+ 'date_range' => null, // array( 'field' => 'date', 'gt' => 'YYYY-MM-dd', 'lte' => 'YYYY-MM-dd' ); date formats: 'YYYY-MM-dd' or 'YYYY-MM-dd HH:MM:SS'
+
+ 'orderby' => null, // Defaults to 'relevance' if query is set, otherwise 'date'. Pass an array for multiple orders.
+ 'order' => 'DESC',
+
+ 'posts_per_page' => 10,
+
+ 'offset' => null,
+ 'paged' => null,
+
+ /**
+ * Aggregations. Examples:
+ * array(
+ * 'Tag' => array( 'type' => 'taxonomy', 'taxonomy' => 'post_tag', 'count' => 10 ) ),
+ * 'Post Type' => array( 'type' => 'post_type', 'count' => 10 ) ),
+ * );
+ */
+ 'aggregations' => null,
+ );
+
+ $args = wp_parse_args( $args, $defaults );
+
+ $es_query_args = array(
+ 'blog_id' => absint( $args['blog_id'] ),
+ 'size' => absint( $args['posts_per_page'] ),
+ );
+
+ // ES "from" arg (offset)
+ if ( $args['offset'] ) {
+ $es_query_args['from'] = absint( $args['offset'] );
+ } elseif ( $args['paged'] ) {
+ $es_query_args['from'] = max( 0, ( absint( $args['paged'] ) - 1 ) * $es_query_args['size'] );
+ }
+
+ // Limit the offset to $this->max_offset posts, as deep pages get exponentially slower
+ // See https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html
+ $es_query_args['from'] = min( $es_query_args['from'], $this->max_offset );
+
+ if ( ! is_array( $args['author_name'] ) ) {
+ $args['author_name'] = array( $args['author_name'] );
+ }
+
+ // ES stores usernames, not IDs, so transform
+ if ( ! empty( $args['author'] ) ) {
+ if ( ! is_array( $args['author'] ) ) {
+ $args['author'] = array( $args['author'] );
+ }
+
+ foreach ( $args['author'] as $author ) {
+ $user = get_user_by( 'id', $author );
+
+ if ( $user && ! empty( $user->user_login ) ) {
+ $args['author_name'][] = $user->user_login;
+ }
+ }
+ }
+
+ //////////////////////////////////////////////////
+ // Build the filters from the query elements.
+ // Filters rock because they are cached from one query to the next
+ // but they are cached as individual filters, rather than all combined together.
+ // May get performance boost by also caching the top level boolean filter too.
+ $filters = array();
+
+ if ( $args['post_type'] ) {
+ if ( ! is_array( $args['post_type'] ) ) {
+ $args['post_type'] = array( $args['post_type'] );
+ }
+
+ $filters[] = array(
+ 'terms' => array(
+ 'post_type' => $args['post_type'],
+ ),
+ );
+ }
+
+ if ( $args['author_name'] ) {
+ $filters[] = array(
+ 'terms' => array(
+ 'author_login' => $args['author_name'],
+ ),
+ );
+ }
+
+ if ( ! empty( $args['date_range'] ) && isset( $args['date_range']['field'] ) ) {
+ $field = $args['date_range']['field'];
+
+ unset( $args['date_range']['field'] );
+
+ $filters[] = array(
+ 'range' => array(
+ $field => $args['date_range'],
+ ),
+ );
+ }
+
+ if ( is_array( $args['terms'] ) ) {
+ foreach ( $args['terms'] as $tax => $terms ) {
+ $terms = (array) $terms;
+
+ if ( count( $terms ) && mb_strlen( $tax ) ) {
+ switch ( $tax ) {
+ case 'post_tag':
+ $tax_fld = 'tag.slug';
+
+ break;
+
+ case 'category':
+ $tax_fld = 'category.slug';
+
+ break;
+
+ default:
+ $tax_fld = 'taxonomy.' . $tax . '.slug';
+
+ break;
+ }
+
+ foreach ( $terms as $term ) {
+ $filters[] = array(
+ 'term' => array(
+ $tax_fld => $term,
+ ),
+ );
+ }
+ }
+ }
+ }
+
+ if ( $args['query'] ) {
+ $query = array(
+ 'multi_match' => array(
+ 'query' => $args['query'],
+ 'fields' => $args['query_fields'],
+ 'operator' => 'and',
+ 'type' => 'cross_fields',
+ ),
+ );
+
+ $builder->add_query( $query );
+
+ Jetpack_Search::score_query_by_recency( $builder );
+
+ if ( ! $args['orderby'] ) {
+ $args['orderby'] = array( 'relevance' );
+ }
+ } else {
+ if ( ! $args['orderby'] ) {
+ $args['orderby'] = array( 'date' );
+ }
+ }
+
+ // Validate the "order" field
+ switch ( strtolower( $args['order'] ) ) {
+ case 'asc':
+ $args['order'] = 'asc';
+ break;
+
+ case 'desc':
+ default:
+ $args['order'] = 'desc';
+ break;
+ }
+
+ $es_query_args['sort'] = array();
+
+ foreach ( (array) $args['orderby'] as $orderby ) {
+ // Translate orderby from WP field to ES field
+ switch ( $orderby ) {
+ case 'relevance' :
+ //never order by score ascending
+ $es_query_args['sort'][] = array(
+ '_score' => array(
+ 'order' => 'desc',
+ ),
+ );
+
+ break;
+
+ case 'date' :
+ $es_query_args['sort'][] = array(
+ 'date' => array(
+ 'order' => $args['order'],
+ ),
+ );
+
+ break;
+
+ case 'ID' :
+ $es_query_args['sort'][] = array(
+ 'id' => array(
+ 'order' => $args['order'],
+ ),
+ );
+
+ break;
+
+ case 'author' :
+ $es_query_args['sort'][] = array(
+ 'author.raw' => array(
+ 'order' => $args['order'],
+ ),
+ );
+
+ break;
+ } // End switch().
+ } // End foreach().
+
+ if ( empty( $es_query_args['sort'] ) ) {
+ unset( $es_query_args['sort'] );
+ }
+
+ if ( ! empty( $filters ) && is_array( $filters ) ) {
+ foreach ( $filters as $filter ) {
+ $builder->add_filter( $filter );
+ }
+
+ $es_query_args['filter'] = $builder->build_filter();
+ }
+
+ $es_query_args['query'] = $builder->build_query();
+
+ // Aggregations
+ if ( ! empty( $args['aggregations'] ) ) {
+ $this->add_aggregations_to_es_query_builder( $args['aggregations'], $builder );
+
+ $es_query_args['aggregations'] = $builder->build_aggregation();
+ }
+
+ return $es_query_args;
+ }
+
+ /**
+ * Given an array of aggregations, parse and add them onto the Jetpack_WPES_Query_Builder object for use in ES
+ *
+ * @module search
+ *
+ * @param array $aggregations Array of Aggregations (filters) to add to the Jetpack_WPES_Query_Builder
+ *
+ * @param Jetpack_WPES_Query_Builder $builder The builder instance that is creating the ES query
+ */
+ public function add_aggregations_to_es_query_builder( array $aggregations, Jetpack_WPES_Query_Builder $builder ) {
+ foreach ( $aggregations as $label => $aggregation ) {
+ switch ( $aggregation['type'] ) {
+ case 'taxonomy':
+ $this->add_taxonomy_aggregation_to_es_query_builder( $aggregation, $label, $builder );
+
+ break;
+
+ case 'post_type':
+ $this->add_post_type_aggregation_to_es_query_builder( $aggregation, $label, $builder );
+
+ break;
+
+ case 'date_histogram':
+ $this->add_date_histogram_aggregation_to_es_query_builder( $aggregation, $label, $builder );
+
+ break;
+ }
+ }
+ }
+
+ /**
+ * Given an individual taxonomy aggregation, add it to the Jetpack_WPES_Query_Builder object for use in ES
+ *
+ * @module search
+ *
+ * @param array $aggregation The aggregation to add to the query builder
+ * @param string $label The 'label' (unique id) for this aggregation
+ * @param Jetpack_WPES_Query_Builder $builder The builder instance that is creating the ES query
+ */
+ public function add_taxonomy_aggregation_to_es_query_builder( array $aggregation, $label, Jetpack_WPES_Query_Builder $builder ) {
+ $field = null;
+
+ switch ( $aggregation['taxonomy'] ) {
+ case 'post_tag':
+ $field = 'tag';
+ break;
+
+ case 'category':
+ $field = 'category';
+ break;
+
+ default:
+ $field = 'taxonomy.' . $aggregation['taxonomy'];
+ break;
+ }
+
+ $builder->add_aggs( $label, array(
+ 'terms' => array(
+ 'field' => $field . '.slug',
+ 'size' => min( (int) $aggregation['count'], $this->max_aggregations_count ),
+ ),
+ ));
+ }
+
+ /**
+ * Given an individual post_type aggregation, add it to the Jetpack_WPES_Query_Builder object for use in ES
+ *
+ * @module search
+ *
+ * @param array $aggregation The aggregation to add to the query builder
+ * @param string $label The 'label' (unique id) for this aggregation
+ * @param Jetpack_WPES_Query_Builder $builder The builder instance that is creating the ES query
+ */
+ public function add_post_type_aggregation_to_es_query_builder( array $aggregation, $label, Jetpack_WPES_Query_Builder $builder ) {
+ $builder->add_aggs( $label, array(
+ 'terms' => array(
+ 'field' => 'post_type',
+ 'size' => min( (int) $aggregation['count'], $this->max_aggregations_count ),
+ ),
+ ));
+ }
+
+ /**
+ * Given an individual date_histogram aggregation, add it to the Jetpack_WPES_Query_Builder object for use in ES
+ *
+ * @module search
+ *
+ * @param array $aggregation The aggregation to add to the query builder
+ * @param string $label The 'label' (unique id) for this aggregation
+ * @param Jetpack_WPES_Query_Builder $builder The builder instance that is creating the ES query
+ */
+ public function add_date_histogram_aggregation_to_es_query_builder( array $aggregation, $label, Jetpack_WPES_Query_Builder $builder ) {
+ $builder->add_aggs( $label, array(
+ 'date_histogram' => array(
+ 'interval' => $aggregation['interval'],
+ 'field' => ( ! empty( $aggregation['field'] ) && 'post_date_gmt' == $aggregation['field'] ) ? 'date_gmt' : 'date',
+ ),
+ ));
+ }
+
+ /**
+ * And an existing filter object with a list of additional filters.
+ *
+ * Attempts to optimize the filters somewhat.
+ *
+ * @module search
+ *
+ * @param array $curr_filter The existing filters to build upon
+ * @param array $filters The new filters to add
+ *
+ * @return array The resulting merged filters
+ */
+ public static function and_es_filters( array $curr_filter, array $filters ) {
+ if ( ! is_array( $curr_filter ) || isset( $curr_filter['match_all'] ) ) {
+ if ( 1 === count( $filters ) ) {
+ return $filters[0];
+ }
+
+ return array(
+ 'and' => $filters,
+ );
+ }
+
+ return array(
+ 'and' => array_merge( array( $curr_filter ), $filters ),
+ );
+ }
+
+ /**
+ * Add a recency score to a given Jetpack_WPES_Query_Builder object, for emphasizing newer posts in results
+ *
+ * Internally uses a gauss decay function
+ *
+ * @module search
+ *
+ * @param Jetpack_WPES_Query_Builder $builder The Jetpack_WPES_Query_Builder to add the recency score to
+ *
+ * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#function-decay
+ */
+ public static function score_query_by_recency( Jetpack_WPES_Query_Builder &$builder ) {
+ //Newer content gets weighted slightly higher
+ $date_scale = '360d';
+ $date_decay = 0.9;
+ $date_origin = date( 'Y-m-d' );
+
+ $builder->add_decay( 'gauss', array(
+ 'date_gmt' => array(
+ 'origin' => $date_origin,
+ 'scale' => $date_scale,
+ 'decay' => $date_decay,
+ ),
+ ));
+ }
+
+ /**
+ * Set the available filters for the search
+ *
+ * These get rendered via the Jetpack_Search_Widget_Filters() widget
+ *
+ * Behind the scenes, these are implemented using Elasticsearch Aggregations.
+ *
+ * If you do not require counts of how many documents match each filter, please consider using regular WP Query
+ * arguments instead, such as via the jetpack_search_es_wp_query_args filter
+ *
+ * @module search
+ *
+ * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html
+ *
+ * @param array $aggregations Array of filters (aggregations) to apply to the search
+ */
+ public function set_filters( array $aggregations ) {
+ $this->aggregations = $aggregations;
+ }
+
+ /**
+ * Set the search's facets (deprecated)
+ *
+ * @module search
+ *
+ * @deprecated 5.0 Please use Jetpack_Search::set_filters() instead
+ *
+ * @see Jetpack_Search::set_filters()
+ *
+ * @param array $facets Array of facets to apply to the search
+ */
+ public function set_facets( array $facets ) {
+ _deprecated_function( __METHOD__, 'jetpack-5.0', 'Jetpack_Search::set_filters()' );
+
+ $this->set_filters( $facets );
+ }
+
+ /**
+ * Get the raw Aggregation results from the ES response
+ *
+ * @module search
+ *
+ * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html
+ *
+ * @return array Array of Aggregations performed on the search
+ */
+ public function get_search_aggregations_results() {
+ $aggregations = array();
+
+ $search_result = $this->get_search_result();
+
+ if ( ! empty( $search_result ) && ! empty( $search_result['aggregations'] ) ) {
+ $aggregations = $search_result['aggregations'];
+ }
+
+ return $aggregations;
+ }
+
+ /**
+ * Get the raw Facet results from the ES response
+ *
+ * @module search
+ *
+ * @deprecated 5.0 Please use Jetpack_Search::get_search_aggregations_results() instead
+ *
+ * @see Jetpack_Search::get_search_aggregations_results()
+ *
+ * @return array Array of Facets performed on the search
+ */
+ public function get_search_facets() {
+ _deprecated_function( __METHOD__, 'jetpack-5.0', 'Jetpack_Search::get_search_aggregations_results()' );
+
+ return $this->get_search_aggregations_results();
+ }
+
+ /**
+ * Get the results of the Filters performed, including the number of matching documents
+ *
+ * Returns an array of Filters (keyed by $label, as passed to Jetpack_Search::set_filters()), containing the Filter and all resulting
+ * matching buckets, the url for applying/removing each bucket, etc.
+ *
+ * NOTE - if this is called before the search is performed, an empty array will be returned. Use the $aggregations class
+ * member if you need to access the raw filters set in Jetpack_Search::set_filters()
+ *
+ * @module search
+ *
+ * @param WP_Query $query The optional original WP_Query to use for determining which filters are active. Defaults to the main query
+ *
+ * @return array Array of Filters applied and info about them
+ */
+ public function get_filters( WP_Query $query = null ) {
+ if ( ! $query instanceof WP_Query ) {
+ global $wp_query;
+
+ $query = $wp_query;
+ }
+
+ $aggregation_data = $this->aggregations;
+
+ if ( empty( $aggregation_data ) ) {
+ return $aggregation_data;
+ }
+
+ $aggregation_results = $this->get_search_aggregations_results();
+
+ if ( ! $aggregation_results ) {
+ return $aggregation_data;
+ }
+
+ // NOTE - Looping over the _results_, not the original configured aggregations, so we get the 'real' data from ES
+ foreach ( $aggregation_results as $label => $aggregation ) {
+ if ( empty( $aggregation ) ) {
+ continue;
+ }
+
+ $type = $this->aggregations[ $label ]['type'];
+
+ $aggregation_data[ $label ]['buckets'] = array();
+
+ $existing_term_slugs = array();
+
+ $tax_query_var = null;
+
+ // Figure out which terms are active in the query, for this taxonomy
+ if ( 'taxonomy' === $this->aggregations[ $label ]['type'] ) {
+ $tax_query_var = $this->get_taxonomy_query_var( $this->aggregations[ $label ]['taxonomy'] );
+
+ if ( ! empty( $query->tax_query ) && ! empty( $query->tax_query->queries ) && is_array( $query->tax_query->queries ) ) {
+ foreach( $query->tax_query->queries as $tax_query ) {
+ if ( $this->aggregations[ $label ]['taxonomy'] === $tax_query['taxonomy'] &&
+ 'slug' === $tax_query['field'] &&
+ is_array( $tax_query['terms'] ) ) {
+ $existing_term_slugs = array_merge( $existing_term_slugs, $tax_query['terms'] );
+ }
+ }
+ }
+ }
+
+ // Now take the resulting found aggregation items and generate the additional info about them, such as
+ // activation/deactivation url, name, count, etc
+ $buckets = array();
+
+ if ( ! empty( $aggregation['buckets'] ) ) {
+ $buckets = (array) $aggregation['buckets'];
+ }
+
+ // Some aggregation types like date_histogram don't support the max results parameter
+ if ( is_int( $this->aggregations[ $label ]['count'] ) && count( $buckets ) > $this->aggregations[ $label ]['count'] ) {
+ $buckets = array_slice( $buckets, 0, $this->aggregations[ $label ]['count'] );
+ }
+
+ foreach ( $buckets as $item ) {
+ $query_vars = array();
+ $active = false;
+ $remove_url = null;
+ $name = '';
+
+ // What type was the original aggregation?
+ switch ( $type ) {
+ case 'taxonomy':
+ $taxonomy = $this->aggregations[ $label ]['taxonomy'];
+
+ $term = get_term_by( 'slug', $item['key'], $taxonomy );
+
+ if ( ! $term || ! $tax_query_var ) {
+ continue 2; // switch() is considered a looping structure
+ }
+
+ $query_vars = array(
+ $tax_query_var => implode( '+', array_merge( $existing_term_slugs, array( $term->slug ) ) ),
+ );
+
+ $name = $term->name;
+
+ // Let's determine if this term is active or not
+
+ if ( in_array( $item['key'], $existing_term_slugs, true ) ) {
+ $active = true;
+
+ $slug_count = count( $existing_term_slugs );
+
+ if ( $slug_count > 1 ) {
+ $remove_url = add_query_arg( $tax_query_var, urlencode( implode( '+', array_diff( $existing_term_slugs, array( $item['key'] ) ) ) ) );
+ } else {
+ $remove_url = remove_query_arg( $tax_query_var );
+ }
+ }
+
+ break;
+
+ case 'post_type':
+ $post_type = get_post_type_object( $item['key'] );
+
+ if ( ! $post_type || $post_type->exclude_from_search ) {
+ continue 2; // switch() is considered a looping structure
+ }
+
+ $query_vars = array(
+ 'post_type' => $item['key'],
+ );
+
+ $name = $post_type->labels->singular_name;
+
+ // Is this post type active on this search?
+ $post_types = $query->get( 'post_type' );
+
+ if ( ! is_array( $post_types ) ) {
+ $post_types = array( $post_types );
+ }
+
+ if ( in_array( $item['key'], $post_types ) ) {
+ $active = true;
+
+ $post_type_count = count( $post_types );
+
+ // For the right 'remove filter' url, we need to remove the post type from the array, or remove the param entirely if it's the only one
+ if ( $post_type_count > 1 ) {
+ $remove_url = add_query_arg( 'post_type', urlencode_deep( array_diff( $post_types, array( $item['key'] ) ) ) );
+ } else {
+ $remove_url = remove_query_arg( 'post_type' );
+ }
+ }
+
+ break;
+
+ case 'date_histogram':
+ $timestamp = $item['key'] / 1000;
+
+ $current_year = $query->get( 'year' );
+ $current_month = $query->get( 'monthnum' );
+ $current_day = $query->get( 'day' );
+
+ switch ( $this->aggregations[ $label ]['interval'] ) {
+ case 'year':
+ $year = (int) date( 'Y', $timestamp );
+
+ $query_vars = array(
+ 'year' => $year,
+ 'monthnum' => false,
+ 'day' => false,
+ );
+
+ $name = $year;
+
+ // Is this year currently selected?
+ if ( ! empty( $current_year ) && (int) $current_year === $year ) {
+ $active = true;
+
+ $remove_url = remove_query_arg( array( 'year', 'monthnum', 'day' ) );
+ }
+
+ break;
+
+ case 'month':
+ $year = (int) date( 'Y', $timestamp );
+ $month = (int) date( 'n', $timestamp );
+
+ $query_vars = array(
+ 'year' => $year,
+ 'monthnum' => $month,
+ 'day' => false,
+ );
+
+ $name = date( 'F Y', $timestamp );
+
+ // Is this month currently selected?
+ if ( ! empty( $current_year ) && (int) $current_year === $year &&
+ ! empty( $current_month ) && (int) $current_month === $month ) {
+ $active = true;
+
+ $remove_url = remove_query_arg( array( 'monthnum', 'day' ) );
+ }
+
+ break;
+
+ case 'day':
+ $year = (int) date( 'Y', $timestamp );
+ $month = (int) date( 'n', $timestamp );
+ $day = (int) date( 'j', $timestamp );
+
+ $query_vars = array(
+ 'year' => $year,
+ 'monthnum' => $month,
+ 'day' => $day,
+ );
+
+ $name = date( 'F jS, Y', $timestamp );
+
+ // Is this day currently selected?
+ if ( ! empty( $current_year ) && (int) $current_year === $year &&
+ ! empty( $current_month ) && (int) $current_month === $month &&
+ ! empty( $current_day ) && (int) $current_day === $day ) {
+ $active = true;
+
+ $remove_url = remove_query_arg( array( 'day' ) );
+ }
+
+ break;
+
+ default:
+ continue 3; // switch() is considered a looping structure
+ } // End switch().
+
+ break;
+
+ default:
+ //continue 2; // switch() is considered a looping structure
+ } // End switch().
+
+ // Need to urlencode param values since add_query_arg doesn't
+ $url_params = urlencode_deep( $query_vars );
+
+ $aggregation_data[ $label ]['buckets'][] = array(
+ 'url' => add_query_arg( $url_params ),
+ 'query_vars' => $query_vars,
+ 'name' => $name,
+ 'count' => $item['doc_count'],
+ 'active' => $active,
+ 'remove_url' => $remove_url,
+ 'type' => $type,
+ 'type_label' => $label,
+ );
+ } // End foreach().
+ } // End foreach().
+
+ return $aggregation_data;
+ }
+
+ /**
+ * Get the results of the Facets performed
+ *
+ * @module search
+ *
+ * @deprecated 5.0 Please use Jetpack_Search::get_filters() instead
+ *
+ * @see Jetpack_Search::get_filters()
+ *
+ * @return array $facets Array of Facets applied and info about them
+ */
+ public function get_search_facet_data() {
+ _deprecated_function( __METHOD__, 'jetpack-5.0', 'Jetpack_Search::get_filters()' );
+
+ return $this->get_filters();
+ }
+
+ /**
+ * Get the Filters that are currently applied to this search
+ *
+ * @module search
+ *
+ * @return array Array if Filters that were applied
+ */
+ public function get_active_filter_buckets() {
+ $active_buckets = array();
+
+ $filters = $this->get_filters();
+
+ if ( ! is_array( $filters ) ) {
+ return $active_buckets;
+ }
+
+ foreach( $filters as $filter ) {
+ if ( isset( $filters['buckets'] ) && is_array( $filter['buckets'] ) ) {
+ foreach( $filter['buckets'] as $item ) {
+ if ( isset( $item['active'] ) && $item['active'] ) {
+ $active_buckets[] = $item;
+ }
+ }
+ }
+ }
+
+ return $active_buckets;
+ }
+
+ /**
+ * Get the Filters that are currently applied to this search
+ *
+ * @module search
+ *
+ * @return array Array if Filters that were applied
+ */
+ public function get_current_filters() {
+ _deprecated_function( __METHOD__, 'jetpack-5.0', 'Jetpack_Search::get_active_filter_buckets()' );
+
+ return $this->get_active_filter_buckets();
+ }
+
+ /**
+ * Calculate the right query var to use for a given taxonomy
+ *
+ * Allows custom code to modify the GET var that is used to represent a given taxonomy, via the jetpack_search_taxonomy_query_var filter
+ *
+ * @module search
+ *
+ * @param string $taxonomy_name The name of the taxonomy for which to get the query var
+ *
+ * @return bool|string The query var to use for this taxonomy, or false if none found
+ */
+ public function get_taxonomy_query_var( $taxonomy_name ) {
+ $taxonomy = get_taxonomy( $taxonomy_name );
+
+ if ( ! $taxonomy || is_wp_error( $taxonomy ) ) {
+ return false;
+ }
+
+ /**
+ * Modify the query var to use for a given taxonomy
+ *
+ * @module search
+ *
+ * @since 5.0.0
+ *
+ * @param string $query_var The current query_var for the taxonomy
+ * @param string $taxonomy_name The taxonomy name
+ */
+ return apply_filters( 'jetpack_search_taxonomy_query_var', $taxonomy->query_var, $taxonomy_name );
+ }
+}