All the crawlers are pretending to be normal users and they're hitting the facets, so mosparo which is a modern form protection would not do much when crawlers are retrieving all the links on a page then visiting them in all possible combinations. So there are two options as far as I can see:
- no links on facets
- prevention before ariving to server
Thanks for the input.
Seen that but that is for facets 3, which ahave whole lot of other issues. So we had to revert to version 2 where drupalSettings have information about facets ajax views.
Hi all. I have been toying around removing links from facets entirely. If there is no links, nothing can be clicked. Testing it on few sites now and it works. Might need to tweak a bit template. Don't have enought time to create a module and make it super generic, but hope it helps.
#linkless-facet.js
(function ($, Drupal, drupalSettings) {
Drupal.behaviors.facetsAjaxBehavior = {
attach: function (context, settings) {
if (context !== document) {
return;
}
/**
* Hijack facets a link functionality and execute without links
*/
$(document).on('change keyup', '.facets-checkbox, #custom-items-per-page, #custom-sort-by, #custom-sort-order, #custom-search-api-fulltext, #custom-search-api-fulltext-1', function (e) {
if (e.type === 'keyup' && e.key !== 'Enter' && e.keyCode !== 13) {
return;
}
let selectedPage = 0;
if (paginationTriggered) {
selectedPage = $('#custom-items-per-page').data('selectedPage') || 0;
}
paginationTriggered = false;
const selectedFacets = [];
$('.facets-checkbox:checked').each(function (index) {
const facetValue = $(this).data('facet-value');
const facetName = $(this)
.closest(".block-facets-ajax")
.find(".facet-header")
.data("filter-id");
if (facetName && facetValue) {
selectedFacets.push(`f%5B${index}%5D=${encodeURIComponent(facetName + ':' + facetValue)}`);
}
});
// Collect other data to pass via query params
const sortBy = $('#custom-sort-by').val() || '';
const sortOrder = $('#custom-sort-order').val() || '';
const itemsPerPage = $('#custom-items-per-page').val() || '';
const searchText1 = $('#custom-search-api-fulltext').val() || '';
const searchText2 = $('#custom-search-api-fulltext-1').val() || '';
let query = selectedFacets.join('&');
if (sortBy) query += `&sort_by=${encodeURIComponent(sortBy)}`;
if (sortOrder) query += `&sort_order=${encodeURIComponent(sortOrder)}`;
if (itemsPerPage) query += `&items_per_page=${encodeURIComponent(itemsPerPage)}`;
if (searchText1) query += `&search_api_fulltext=${encodeURIComponent(searchText1)}`;
if (searchText2) query += `&search_api_fulltext_1=${encodeURIComponent(searchText2)}`;
query += `&page=${selectedPage}`;
// get view information from facet config first key
const facetConfig = drupalSettings.facets_views_ajax[Object.keys(drupalSettings.facets_views_ajax)[0]];
if (!facetConfig) {
console.warn("No facet configuration found.");
return;
}
const viewName = facetConfig.view_id || "";
const viewDisplayId = facetConfig.current_display_id || "";
const viewBasePath = `/${facetConfig.view_base_path}` || "";
const viewPath = facetConfig.ajax_path || '/views/ajax';
const pagerElement = 0;
let q = `q=${viewBasePath}?${selectedFacets.join('&')}`;
let viewDomId = null;
if (drupalSettings.views && drupalSettings.views.ajaxViews) {
$.each(drupalSettings.views.ajaxViews, function (key, viewData) {
if (viewData.view_name === viewName && viewData.view_display_id === viewDisplayId) {
viewDomId = viewData.view_dom_id;
return false;
}
});
}
if (!viewDomId) {
console.warn("No matching view_dom_id found.");
return;
}
let getRequestUrl = `${viewPath}?${q}` +
`&_wrapper_format=drupal_ajax` +
`&view_name=${viewName}` +
`&view_display_id=${viewDisplayId}` +
`&view_args=&view_path=${encodeURIComponent(viewBasePath)}` +
`&view_base_path=${encodeURIComponent(viewBasePath)}` +
`&view_dom_id=${encodeURIComponent(viewDomId)}` +
`&pager_element=${pagerElement}` +
`&${query}` +
`&_drupal_ajax=1`;
const ajaxPageState = drupalSettings.ajaxPageState || {};
for (const [key, value] of Object.entries(ajaxPageState)) {
getRequestUrl += `&ajax_page_state[${key}]=${encodeURIComponent(value)}`;
}
// combine and get the block ids and facet from facetConfig and data-contextual-id within block-facets-ajax
const facetsBlocks = {};
$('.block-facets-ajax').each(function () {
const blockIdStart = 'js-facet-block-id-';
const blockId = $.map($(this).attr('class').split(' '), function (v) {
if (v.indexOf(blockIdStart) > -1) {
return v.slice(blockIdStart.length);
}
}).join();
facetsBlocks[blockId] = '#' + $(this).attr('id');
});
// Call for filtered data/cards
$.ajax({
url: getRequestUrl,
type: 'GET',
success: function (response) {
if (Array.isArray(response)) {
// Find the specific command that contains the dom id data
response.forEach(function (command) {
// Ensure that the command has the necessary structure with the 'selector' field
if (command.command === 'insert' && command.selector && command.method === 'replaceWith' && command.selector.includes(viewDomId)) {
// Modify the response data that contains the pagination
let updatedData = $('<div>').append($.parseHTML(command.data)); // Convert HTML string to jQuery object
// Check for pagination links and modify them before inserting them
updatedData.find('.pagination').each(function () {
const lastPageLink = $(this).find('.pager__item--last .page-link');
if (lastPageLink.length > 0) {
const lastPageData = lastPageLink.data('page');
if (lastPageData === 0) {
let totalItems = null;
updatedData.find('.view-header').each(function () {
if ($(this).text().toLowerCase().includes('result')) {
let numbers = $(this).text().match(/\d+/g);
totalItems = numbers ? numbers.join('') : null;
return false; // Stop iteration after finding the result
}
});
if (!totalItems) {
$(this).find('.pager__item--last').remove();
} else {
const totalPages = Math.ceil(totalItems / itemsPerPage);
// Explicitly set data and attribute
lastPageLink.attr('data-page', totalPages - 1); // pagination from 0
lastPageLink[0].outerHTML = lastPageLink[0].outerHTML.replace('data-page="0"', `data-page="${totalPages}"`); // Ensure replacement in raw HTML
}
}
}
});
// Update command data with modified HTML
command.data = updatedData.html();
}
});
// Now that we've fixed the pagination, insert the updated response
const ajaxInstance = new Drupal.Ajax(false, false, {url: getRequestUrl});
Drupal.Ajax.prototype.success.call(ajaxInstance, response);
const postRequestData = {
facet_link: `${viewBasePath}?${query}`,
facets_blocks: facetsBlocks,
_drupal_ajax: 1
};
Object.entries(ajaxPageState).forEach(([key, value]) => {
postRequestData[`ajax_page_state[${key}]`] = encodeURIComponent(value);
});
// Call for facet filters only if page is 0
if (selectedPage === 0) {
$.ajax({
url: '/facets-block-ajax?_wrapper_format=drupal_ajax',
type: 'POST',
data: $.param(postRequestData),
success: function (response) {
if (Array.isArray(response)) {
response.forEach(function (command) {
if (command.command === 'insert' && command.selector && command.method === 'replaceWith') {
const $facetBlock = $(command.selector);
if ($facetBlock.length > 0) {
// Extract baseId from the selector
const baseId = command.selector.replace(/^#/, '').split('--')[0];
// Ensure command.data is a string
if (typeof command.data !== 'string' || !command.data.trim()) {
console.error("Error: command.data is empty or not a valid string.");
return;
}
// Convert command.data to a jQuery object for manipulation
let $data = $('<div>').html(command.data);
// Find the element inside command.data with a class starting with 'js-facet-block-id-'
let $dataBlock = $data.find('[class*="js-facet-block-id-"]').first();
if ($dataBlock.length > 0) {
let originalId = $dataBlock.attr('id');
if (originalId) {
let randomSuffixMatch = originalId.match(/--(.+)$/);
let randomSuffix = randomSuffixMatch ? `--${randomSuffixMatch[1]}` : '';
// Replace the ID with baseId and keep the random part if it exists
let newId = baseId + randomSuffix;
$dataBlock.attr('id', newId);
}
}
// Replace the old block in the DOM with the modified command.data
$facetBlock.replaceWith($data.html());
// Step 4: Reattach Drupal behaviors
const $newFacetBlock = $(document).find(`[id^="${baseId}"]`);
Drupal.attachBehaviors($newFacetBlock[0]);
// Check/uncheck checkboxes
$(document).find(`[id^="${baseId}"] input[type="checkbox"]:not(:checked)`).removeAttr('checked');
$(document).find(`[id^="${baseId}"] input[type="checkbox"]:checked`).attr('checked', 'checked');
}
}
});
}
},
error: function (xhr) {
console.error("POST Request Error:", xhr);
}
});
}
}
},
error: function (xhr) {
console.error("GET Request Error:", xhr);
}
});
});
let paginationTriggered = false;
$(document).on('click', '.js-pager-link', function (e) {
e.preventDefault();
if (!paginationTriggered) {
paginationTriggered = true;
let selectedPage = $(this).data('page');
console.log('clicked', selectedPage)
// Trigger facets update after a short delay
setTimeout(() => {
$('#custom-items-per-page')
.data('selectedPage', selectedPage)
.trigger('change');
}, 100);
}
});
}
};
})(jQuery, Drupal, drupalSettings);
layout/search/facets-item-list--checkbox.html.twig
{#
/**
* @file
* Default theme implementation for a facets item list.
*
* Available variables:
* - items: A list of items. Each item contains:
* - attributes: HTML attributes to be applied to each list item.
* - value: The content of the list element.
* - title: The title of the list.
* - list_type: The tag for list element ("ul" or "ol").
* - wrapper_attributes: HTML attributes to be applied to the list wrapper.
* - attributes: HTML attributes to be applied to the list.
* - empty: A message to display when there are no items. Allowed value is a
* string or render array.
* - context: A list of contextual data associated with the list. May contain:
* - list_style: The ID of the widget plugin this facet uses.
* - facet: The facet for this result item.
* - id: the machine name for the facet.
* - label: The facet label.
*
* @see facets_preprocess_facets_item_list()
*
* @ingroup themeable
*/
#}
{% block facet_header %}
<div class="facet-header p-3 filter-expand d-flex justify-content-between align-items-center" data-expand="true"
data-filter-id="{{ facet.id }}">
{%- if title is not empty -%}
<h3 class="mb-0">{{ title }}</h3>
<i class="fal fa-minus fs-20"></i>
{%- endif -%}
</div>
{% endblock facet_header %}
<div class="content">
{% if items|length > 20 %}
<div class="facet-search p-3">
<input type="text" class="filter-search w-100 fs-14 form-control" placeholder="Text search"
aria-label="Search" data-filter-content="filter-list">
</div>
{% endif %}
<div class="filter-list p-3 list-style-none">
<div class="facets-widget-{{- facet.widget.type -}}">
{% if facet.widget.type %}
{%- set attributes = attributes.addClass('item-list__' ~ facet.widget.type) %}
{% endif %}
{% if items or empty %}
{%- if items -%}
<{{ list_type }}{{ attributes }}>
{%- for item in items -%}
{# DISPLAY #}
{%- set facet_data = item.value['#title'] -%}
{%- set value = facet_data['#value'] -%}
{%- set count = facet_data['#count'] -%}
{%- set is_active = facet_data['#is_active'] -%}
{# INPUT #}
{%- set item_attrs = item.value['#attributes'] -%}
{%- set item_id = item_attrs['data-drupal-facet-item-id'] -%}
{%- set item_value = item_attrs['data-drupal-facet-item-value'] -%}
{%- set item_count = item_attrs['data-drupal-facet-item-count'] -%}
<li{{ item.attributes }}>
<input type="checkbox"
class="facets-checkbox"
id="{{ item_id }}"
data-facet-value="{{ item_value }}"
data-facet-count="{{ item_count }}"
{% if is_active %}checked{% endif %}>
<label for="{{ item_id }}">
<span class="facet-item__value">{{ value }}</span>
<span class="facet-item__count">({{ count }})</span>
</label>
</li>
{%- endfor -%}
</{{ list_type }}>
{%- else -%}
{{- empty -}}
{%- endif -%}
{%- endif %}
{% if facet.widget.type == "dropdown" %}
<label id="facet_{{ facet.id }}_label">{{ 'Facet'|t }} {{ facet.label }}</label>
{%- endif %}
</div>
</div>
</div>
navigation/pager.html.twig
{% if items %}
<nav aria-label="{{ heading_id }}">
<h4 id="{{ heading_id }}" class="sr-only">{{ 'Pagination'|t }}</h4>
<ul class="pagination js-pager__items d-flex justify-content-center align-items-center mt-3 mb-3">
{# First page button #}
{% if items.first %}
<li class="pager-item pager__item--first">
<div data-page="0"
class="page-link d-flex border-0 justify-content-center align-items-center js-pager-link">
<i class="fas fa-caret-left"></i><i class="fas fa-caret-left"></i>
</div>
</li>
{% endif %}
{# Previous button #}
{% if items.previous %}
{% set prev_page = current -2 %}
<li class="pager-item pager__item--previous">
<div data-page="{{ prev_page }}"
class="page-link d-flex border-0 justify-content-center align-items-center js-pager-link">
<i class="fas fa-caret-left"></i>
</div>
</li>
{% endif %}
{# Ellipsis for previous pages #}
{% if ellipses.previous %}
<li class="page-item" role="presentation"><span class="page-link border-0">…</span></li>
{% endif %}
{# Page number buttons #}
{% for key, item in items.pages %}
<li class="page-item {{ current == key ? 'active' : '' }}">
{% if current == key %}
<span class="page-link border-0">
{{- key -}}
</span>
{% else %}
<div data-page="{{ key - 1 }}" class="page-link border-0 js-pager-link">
{{- key -}}
</div>
{% endif %}
</li>
{% endfor %}
{# Ellipsis for next pages #}
{% if ellipses.next %}
<li class="page-item" role="presentation"><span class="page-link border-0">…</span></li>
{% endif %}
{# Next button #}
{% if items.next %}
{% set next_page = current %}
<li class="pager-item pager__item--next">
<div data-page="{{ next_page }}"
class="page-link d-flex border-0 justify-content-center align-items-center js-pager-link">
<i class="fas fa-caret-right"></i>
</div>
</li>
{% endif %}
{# Last page button #}
{% if items.last %}
{% set last_url = items.last.href %}
{# Find the page parameter in the URL using a regular expression or by splitting #}
{% set page_param = last_url|split('?')|last %}
{% set page_number = 0 %}
{# Loop over each part of the query string after '?' and check for 'page' #}
{% for param in page_param|split('&') %}
{% if param|split('=')|first == 'page' %}
{% set page_number = param|split('=')|last %}
{% endif %}
{% endfor %}
<li class="pager-item pager__item--last">
<div data-page="{{ last_page|default(0) }}"
class="page-link border-0 d-flex justify-content-center align-items-center js-pager-link">
<i class="fas fa-caret-right"></i><i class="fas fa-caret-right"></i>
</div>
</li>
{% endif %}
</ul>
</nav>
{% endif %}
Hi. Have seen all of this and we have that implemented. The robots.txt is not correct and it should be as suggested one does not work correctly when multiple filters/facets are selected:
User-agent: *
Disallow: *?*=*
Disallow: *?*=*&*=*
Disallow: *?*=*=*
Here are some other links if you are interested:
We have seen a drop in crawlers on facets, but not significant enough to spend $200 per domain as we can not transfer managment to Cloudflare.
We will be testing new facet implementation where there are no links on search pages and filtering is done via custom logic via ajax. Will report on the outcome of that.
Hi. We've been having same issues. We tried to minimize the impact by puting websites behind CloudFront and enable AWS WAF rules. Had to create custom rules for facets only that block users when they make to many calls to facets. It is not a fix, but it helps a bit.
Hello,
any news on this? We are using D10.2 with php8.1 and saving menu, or any admin settings changes do not clear memcached.
Thanks