Magento Forum

   
Page 1 of 2
SphinxSearch on Magento - Quick Guide
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 

If you ever wanted to intergrate a better search engine into Magento, then SphinxSearch seams to be the easiest.

We have a windows based machine, and getting SphinxSearch to work on it is really easy, just go to the website, download the windows build and follow the instruction.

Get used to learning Sphinx before you use it with Magento, it took me a couple of days, but it is really flexable, very very quick and can use morphology i.e.  searching for ‘Spades’ also retruns ‘spade’.

There are other settings as well so worth taking a look.

You need to create an index of the fulltext table, this means that it still uses your search options from magento.

index the table - we used the following settings

source search
{
  type          
mysql
  sql_host      
localhost
  sql_user      
root
  sql_pass      
= ********
  
sql_db        = *********
  
sql_query_pre SET NAMES utf8
  sql_query     
SELECT product_iddata_index FROM catalogsearch_fulltext
}

and the index

index search
{    

  source       
search
  path         
= ***** (path to the shpinx index on your server)
  
  
min_prefix_len 3
  enable_star 
1
  morphology 
stem_en
}

obviously this is at its simplest settings.
You can index as many stores as you want by changing the sql_query ‘WHERE’ to the correct store.

The only Magent code that needs changing is Mage_CatalogSearch_Model_Mysql4_Fulltext

At the top you need to add:

include(' *****[path to your sphinx php api on your server]  ');

then find the function

public function prepareResult($object$queryText$query)

and change the whole function to:

public function prepareResult($object$queryText$query)
    
{
        
if (!$query->getIsProcessed()) {
            $searchType 
$object->getSearchType($query->getStoreId());

            
$stringHelper Mage::helper('core/string');
            
/* @var $stringHelper Mage_Core_Helper_String */

            
$bind = array(
                
':query' => $queryText
            
);
            
$like = array();

            
$fulltextCond   '';
            
$likeCond       '';
            
$separateCond   '';

            if (
$searchType == Mage_CatalogSearch_Model_Fulltext::SEARCH_TYPE_LIKE
                
|| $searchType == Mage_CatalogSearch_Model_Fulltext::SEARCH_TYPE_COMBINE{
                $words 
$stringHelper->splitWords($queryTexttrue$query->getMaxQueryWords());
                
$likeI 0;
                foreach (
$words as $word{
                    $like[] 
'`s`.`data_index` LIKE :likew' $likeI;
                    
$bind[':likew' $likeI] '%' $word '%';
                    
$likeI ++;
                
}
                
if ($like{
                    $likeCond 
'(' join(' OR '$like) . ')';
                
}
            }
            
if ($searchType == Mage_CatalogSearch_Model_Fulltext::SEARCH_TYPE_FULLTEXT
                
|| $searchType == Mage_CatalogSearch_Model_Fulltext::SEARCH_TYPE_COMBINE{
                $fulltextCond 
'MATCH (`s`.`data_index`) AGAINST (:query IN BOOLEAN MODE)';
            
}
            
if ($searchType == Mage_CatalogSearch_Model_Fulltext::SEARCH_TYPE_COMBINE && $likeCond{
                $separateCond 
' OR ';
            
}
            
//weight devided by 1000
            
$cl = new SphinxClient();          
            
$cl->SetServer"localhost"9312 );
            
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
            
$cl->AddQuery $queryText"search_piu" );
            
$results $cl->RunQueries();
            foreach(
$results as $result){
                
if ( ! empty($result["matches"]) ) {
                    
foreach ( $result["matches"as $doc => $docinfo {
                        $sql
sprintf("INSERT INTO `priceitupnew`.`catalogsearch_result`"
                            
." (`query_id`, `product_id`, `relevance`) VALUES "
                            
"(%d, %d, %d)"
                            
" ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)",
                        
$query->getId(),
                        
$doc,
                        
$docinfo['weight']/1000                
                        
);
                        
$this->_getWriteAdapter()->query($sql$bind);
                    
}
                }
            }
            
/*$sql = sprintf("INSERT INTO `{$this->getTable('catalogsearch/result')}` "
                . "(SELECT STRAIGHT_JOIN '%d', `s`.`product_id`, MATCH (`s`.`data_index`) "
                . "AGAINST (:query IN BOOLEAN MODE) FROM `{$this->getMainTable()}` AS `s` "
                . "INNER JOIN `{$this->getTable('catalog/product')}` AS `e` "
                . "ON `e`.`entity_id`=`s`.`product_id` WHERE (%s%s%s) AND `s`.`store_id`='%d')"
                . " ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)",
                $query->getId(),
                $fulltextCond,
                $separateCond,
                $likeCond,
                $query->getStoreId()
            );*/

            

            
$query->setIsProcessed(1);
        
}

        
return $this;
    
}

very basic, surprisingly.

Remember this code needs to be added to extension, or local folder in order to protect from upgrades, however haven’t got time to go into this.

Hope this helps someone.

 
Magento Community Magento Community
Magento Community
Magento Community
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 

Quick update,

If you want to test SphinxSearch before changing any Magento code then first get SphinxSearch running on your machine, and get the index working.

You can then test the results by creating a search.php file in your base directory and use the following code

<?php
error_reporting
(E_ALL);
include(
'/sphinx/api/sphinxapi.php  [full path to your sphinx api on the server]  ');
require_once 
'app/Mage.php';
    
$queryText $_GET['q'];
    
$cl = new SphinxClient();          
    
$cl->SetServer"localhost"9312 );
    
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
    
$cl->AddQuery $queryText"search" );
    
$results $cl->RunQueries();
    
print_r($results);
                
?>

then test

http://www.yoursite.com/search.php?q=yourquery

 
Magento Community Magento Community
Magento Community
Magento Community
 
phiber9
Member
 
Avatar
Total Posts:  41
Joined:  2010-05-26
Croatia
 

Nice guide!

I’ve implemented sphinx in a similar way however there are way less search results than magento standard search.

Any ideas on tweaking magento+sphinx to understand something like “blue copacabana shirt size M” if the product information is:

title: copacabana shirt
size attribute: M (used in quick search, used in advanced searched set to yes)
color attribute: blue (used in quick search, used in advanced searched set to yes)

 
Magento Community Magento Community
Magento Community
Magento Community
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 

Hi, Sphinx should pick those details up, maybe try changing some Sphinx settings.

However couple of up updates to the script to add more and relevant results, mayby try adding the following will help (helped me)

$cl->SetLimits(0250);
Set the amount of results \\\"0 = first result, 250 last result, same as mysql LIMIT. currently defaults to 20 results in Magento.
Also setting the mysql insert for weight to string not decimal, as this doesn\\\’t calculate the relevance field in Magento. so:

$cl = new SphinxClient();          
$cl->SetServer"localhost"9312 );
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
$cl->SetLimits(0250);
$cl->AddQuery $queryText"search_piu" );
$results $cl->RunQueries();
print_r($results);
foreach(
$results as $result){
    
if ( ! empty($result["matches"]) ) {
        
foreach ( $result["matches"as $doc => $docinfo {
            $sql
sprintf("INSERT INTO catalogsearch_result"
                
." (`query_id`, `product_id`, `relevance`) VALUES "
                
"(%d, %d, %s)"
                
" ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)",
            
1,
            
$doc,
            
$docinfo['weight']/1000                
            
);
            
$this->_getWriteAdapter()->query($sql);
        
}
    }
}

 
Magento Community Magento Community
Magento Community
Magento Community
 
phiber9
Member
 
Avatar
Total Posts:  41
Joined:  2010-05-26
Croatia
 

On Magento sites the following match mode would be better:

$cl->SetMatchMode(SPH_MATCH_EXTENDED2);

 
Magento Community Magento Community
Magento Community
Magento Community
 
mcm999
Member
 
Total Posts:  42
Joined:  2009-05-06
 

Hi,

I was wondering if you know how to not insert the cached search queries into the catalogsearch_result table?

Every time I do a search it does thousands of inserts to this table which is slowing things down. Do you know how I can avoid this?

I can obviously pay you if you are able to help me. I have also PM\’d you as im not sure if you get the email notifications?

Thanks

Mike

 
Magento Community Magento Community
Magento Community
Magento Community
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 

Couple of updates

add

$cl->SetSortMode SPH_SORT_RELEVANCE  );
before
$cl->SetLimits
and
$queryText str_replace(' ','|',$queryText);
before
$cl->AddQuery

dtcuk - 05 March 2011 01:10 AM

Hi, Sphinx should pick those details up, maybe try changing some Sphinx settings.

However couple of up updates to the script to add more and relevant results, mayby try adding the following will help (helped me)

$cl->SetLimits(0250);
Set the amount of results \\\"0 = first result, 250 last result, same as mysql LIMIT. currently defaults to 20 results in Magento.
Also setting the mysql insert for weight to string not decimal, as this doesn\\\’t calculate the relevance field in Magento. so:

$cl = new SphinxClient();          
$cl->SetServer"localhost"9312 );
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
$cl->SetLimits(0250);
$cl->AddQuery $queryText"search_piu" );
$results $cl->RunQueries();
print_r($results);
foreach(
$results as $result){
    
if ( ! empty($result["matches"]) ) {
        
foreach ( $result["matches"as $doc => $docinfo {
            $sql
sprintf("INSERT INTO catalogsearch_result"
                
." (`query_id`, `product_id`, `relevance`) VALUES "
                
"(%d, %d, %s)"
                
" ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)",
            
1,
            
$doc,
            
$docinfo['weight']/1000                
            
);
            
$this->_getWriteAdapter()->query($sql);
        
}
    }
}

 
Magento Community Magento Community
Magento Community
Magento Community
 
webtrade
Jr. Member
 
Total Posts:  10
Joined:  2011-05-23
 

Hi

I have implemented the above code, but can’t get it to return results. When I do a search I get results like the following

Home /
Search results for: ‘crocodiles’

Array ( [0] => Array ( [error] => [warning] => [status] => 0 [fields] => Array ( [0] => data_index ) [attrs] => Array ( ) [matches] => Array ( [9115] => Array ( [weight] => 1707 [attrs] => Array ( ) ) [70489] => Array ( [weight] => 1707 [attrs] => Array ( ) ) [106933] => Array ( [weight] => 1707 [attrs] => Array ( ) ) ) [total] => 3 [total_found] => 3 [time] => 0.000 [words] => Array ( [crocodil] => Array ( [docs] => 3 [hits] => 3 ) ) ) )
Search results for ‘crocodiles’

Your search returns no results.

I have left the line print_r($results); in the ‘prepareResult’ function for debugging. Sphinx is running and tests fine from the command line.

Can anybody help with what I’m doing wrong?

 
Magento Community Magento Community
Magento Community
Magento Community
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 

It looks like it is returning results ([total] => 3 [total_found] => 3)

You need to loop through the result matches to get the usable information. Try print_r the sql loop above.

i.e.

foreach($results as $result){
    
if ( ! empty($result["matches"]) ) {
        
foreach ( $result["matches"as $doc => $docinfo {
            $sql
sprintf("INSERT INTO catalogsearch_result"
                
." (`query_id`, `product_id`, `relevance`) VALUES "
                
"(%d, %d, %s)"
                
" ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)",
            
1,
            
$doc,
            
$docinfo['weight']/1000                
            
);
            
$this->_getWriteAdapter()->query($sql);
        
}
    }
}

 
Magento Community Magento Community
Magento Community
Magento Community
 
webtrade
Jr. Member
 
Total Posts:  10
Joined:  2011-05-23
 

Thanks for the quick reply.

$cl = new SphinxClient();
$cl->SetServer( “localhost”, 9313 );
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
$cl->SetSortMode ( SPH_SORT_RELEVANCE );
$cl->SetLimits(0, 250);
$queryText = str_replace(’ ‘,’|’,$queryText);
$cl->AddQuery ( $queryText, “eurospan” );
$results = $cl->RunQueries();
print_r($results);
foreach($results as $result){
if ( ! empty($result["matches"]) ) {
foreach ( $result["matches"] as $doc => $docinfo ) {
$sql= sprintf("INSERT INTO catalogsearch_result”
.” (`query_id`, `product_id`, `relevance`) VALUES “
. “(%d, %d, %s)”
. “ ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)”,
1,
$doc,
$docinfo[’weight’]/1000
);
print_r($sql);
// $this->_getWriteAdapter()->query($sql);
}
}
}

is now producing the following:

Array ( [0] => Array ( [error] => [warning] => [status] => 0 [fields] => Array ( [0] => data_index ) [attrs] => Array ( ) [matches] => Array ( [9115] => Array ( [weight] => 1707 [attrs] => Array ( ) ) [70489] => Array ( [weight] => 1707 [attrs] => Array ( ) ) [106933] => Array ( [weight] => 1707 [attrs] => Array ( ) ) ) [total] => 3 [total_found] => 3 [time] => 0.000 [words] => Array ( [crocodil] => Array ( [docs] => 3 [hits] => 3 ) ) ) ) INSERT INTO catalogsearch_result (`query_id`, `product_id`, `relevance`) VALUES (1, 9115, 1.707) ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)INSERT INTO catalogsearch_result (`query_id`, `product_id`, `relevance`) VALUES (1, 70489, 1.707) ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)INSERT INTO catalogsearch_result (`query_id`, `product_id`, `relevance`) VALUES (1, 106933, 1.707) ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)

Presumably, each insert statement should read as follows..?

INSERT INTO catalogsearch_result (`query_id`, `product_id`, `relevance`) VALUES (9115) ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)

 
Magento Community Magento Community
Magento Community
Magento Community
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 

The results are returning the correct details by the look of it, plus the sql statement looks fine.

The sql is one long statement so that it doesn’t do to many inserts/updates.

If you look at the original sql statement you got

INSERT INTO catalogsearch_result (`query_id`, `product_id`, `relevance`)
VALUES (191151.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`)INSERT INTO catalogsearch_result (`query_id`,
`
product_id`, `relevance`) VALUES (1704891.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`)INSERT INTO catalogsearch_result (`query_id`,
`
product_id`, `relevance`) VALUES (11069331.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`)

its correct other than each insert needs a ; after it so it should read

INSERT INTO catalogsearch_result (`query_id`, `product_id`, `relevance`)
VALUES (191151.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`);INSERT INTO catalogsearch_result (`query_id`,
`
product_id`, `relevance`) VALUES (1704891.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`);INSERT INTO catalogsearch_result (`query_id`,
`
product_id`, `relevance`) VALUES (11069331.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`)

To make sure this happens just change

" ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)",

to

" ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`);",

as i say the sql looks fine, have you tried running the sql manually to make sure their are no errors.

ie run

INSERT INTO catalogsearch_result (`query_id`, `product_id`, `relevance`)
VALUES (191151.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`);INSERT INTO catalogsearch_result (`query_id`,
`
product_id`, `relevance`) VALUES (1704891.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`);INSERT INTO catalogsearch_result (`query_id`,
`
product_id`, `relevance`) VALUES (11069331.707ON DUPLICATE KEY UPDATE
`relevance`=VALUES(`relevance`)

through myphpadmin or what ever you use.

At least then it will show that the new search results have been inserted.

If it works the other thing you might need to change is

if (!$query->getIsProcessed()) {

if you remove the if statement (and end if) it will update the database each time you do a search, otherwise nothing will change until the search cache is refreshed.

 
Magento Community Magento Community
Magento Community
Magento Community
 
webtrade
Jr. Member
 
Total Posts:  10
Joined:  2011-05-23
 

I tried what you suggested, thank you. The INSERT statements are working ok (even without the ; between). Rows are being added to the catalogsearch_result table. Although every query_id = 1 (not sure if that is a problem)

I tried commenting out
if (!$query->getIsProcessed()) {
and also the endif

Still getting the message ‘Your search returns no results’. I’m baffled could it be that the query_id needs to be set? I noticed in the original function this line which appears to set the query_id field to match the auto increment catalogsearch_query.query_id field

$fields = array(
‘query_id’ => new Zend_Db_Expr($query->getId()),
‘product_id’,
);

 
Magento Community Magento Community
Magento Community
Magento Community
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 

Looking at my code, I’ve changed it since the original post, So I’ll post what i have now, which might help as i had the same problem as you, but now it works fine (remember to refresh your search cache.)

public function prepareResult($object, $queryText, $query)
{
$adapter = $this->_getWriteAdapter();
if (!$query->getIsProcessed()) {
$ids=false;
$sql= “INSERT INTO `{$this->getTable(’catalogsearch/result’)}` VALUES “;
$cl = new SphinxClient();
$cl->SetServer( “localhost”, 9312 );
//actual phrase matches
$cl->SetMatchMode(SPH_MATCH_PHRASE);
$cl->SetLimits(0, 400);
$q = str_replace(’+’,’|’,$queryText);
//search_piuphrase = your sphinx index
$cl->AddQuery ( $q, “search_piuphrase” );
$results = $cl->RunQueries();
foreach($results as $result){

if ( ! empty($result["matches"]) ) {
foreach ( $result["matches"] as $doc => $docinfo ) {
$ids[]=$doc;
}
}
}

//general matches
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
$cl->SetSortMode ( SPH_SORT_RELEVANCE );
$cl->SetLimits(0, 400);
$q = str_replace(’+’,’|’,$queryText);
$cl->AddQuery ( $q, “search_piu” );
$results = $cl->RunQueries();
foreach($results as $result){
if ( ! empty($result["matches"]) ) {
foreach ( $result["matches"] as $doc => $docinfo ) {
$ids[]=$doc;
}
}
}

//extended matches
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
$cl->SetSortMode ( SPH_SORT_RELEVANCE );
$cl->SetLimits(0, 200);
$q = str_replace(’+’,’|’,$queryText);
$cl->AddQuery ($q, “search_piusoundex” );
$results = $cl->RunQueries();
foreach($results as $result){
if ( ! empty($result["matches"]) ) {
foreach ( $result["matches"] as $doc => $docinfo ) {
$ids[]=$doc;
}
}
}

$cl->SetMatchMode(SPH_MATCH_ANY);
$cl->SetSortMode ( SPH_SORT_RELEVANCE );
$cl->SetLimits(0, 200);
$q = str_replace(’+’,’|’,$queryText);
$cl->AddQuery ( $q, “search_piusoundex” );
$results = $cl->RunQueries();
foreach($results as $result){
if ( ! empty($result["matches"]) ) {
foreach ( $result["matches"] as $doc => $docinfo ) {
$ids[]=$doc;
}
}
}
if($ids){
$i=count($ids);
foreach($ids as $id){
$sql.="(".$query->getId().", “.$id.”, “.$i."),";
$i = $i - 1;
}
$sql = substr($sql,0,-1);
$sql.=” ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)”;
$this->_getWriteAdapter()->query($sql);

}

$query->setIsProcessed(1);
}

return $this;
}

bare in mind this is unedited from my script which uses three indexes for different types of search results, you’ll have to adapt it to your own index types.

 
Magento Community Magento Community
Magento Community
Magento Community
 
webtrade
Jr. Member
 
Total Posts:  10
Joined:  2011-05-23
 

Thank you. Could you post an example of single index use please.

 
Magento Community Magento Community
Magento Community
Magento Community
 
dtcuk
Member
 
Total Posts:  55
Joined:  2009-02-28
 
dtcuk - 04 April 2012 04:08 AM

Looking at my code, I’ve changed it since the original post, So I’ll post what i have now, which might help as i had the same problem as you, but now it works fine (remember to refresh your search cache.)

public function prepareResult($object, $queryText, $query)
{
$adapter = $this->_getWriteAdapter();
if (!$query->getIsProcessed()) {
$ids=false;
$sql= “INSERT INTO `{$this->getTable(’catalogsearch/result’)}` VALUES “;
$cl = new SphinxClient();
$cl->SetServer( “localhost”, 9312 );
//actual phrase matches
$cl->SetMatchMode(SPH_MATCH_PHRASE);
$cl->SetLimits(0, 400);
$q = str_replace(’+’,’|’,$queryText);
//search_piuphrase = your sphinx index
$cl->AddQuery ( $q, “search_piuphrase” );
$results = $cl->RunQueries();
foreach($results as $result){

if ( ! empty($result["matches"]) ) {
foreach ( $result["matches"] as $doc => $docinfo ) {
$ids[]=$doc;
}
}
}

//general matches
$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
$cl->SetSortMode ( SPH_SORT_RELEVANCE );
$cl->SetLimits(0, 400);
$q = str_replace(’+’,’|’,$queryText);
$cl->AddQuery ( $q, “search_piu” );
$results = $cl->RunQueries();
foreach($results as $result){
if ( ! empty($result["matches"]) ) {
foreach ( $result["matches"] as $doc => $docinfo ) {
$ids[]=$doc;
}
}
}


if($ids){
$i=count($ids);
foreach($ids as $id){
$sql.="(".$query->getId().", “.$id.”, “.$i."),";
$i = $i - 1;
}
$sql = substr($sql,0,-1);
$sql.=” ON DUPLICATE KEY UPDATE `relevance`=VALUES(`relevance`)”;
$this->_getWriteAdapter()->query($sql);

}

$query->setIsProcessed(1);
}

return $this;
}

bare in mind this is unedited from my script which uses three indexes for different types of search results, you’ll have to adapt it to your own index types.

 
Magento Community Magento Community
Magento Community
Magento Community
 
webtrade
Jr. Member
 
Total Posts:  10
Joined:  2011-05-23
 

I think I have it working now, thank you so much. One last question..What it the best method to clear the search cache?

 
Magento Community Magento Community
Magento Community
Magento Community
Magento Community
Magento Community
    Back to top
Page 1 of 2