diff --git a/.gitignore b/.gitignore index 1e08bdffb..798150e74 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ public/assets public/index.html public/panel/**/* public/uploads/**/* -solr +solr/data tmp/**/* uploads/**/* @@ -20,4 +20,4 @@ uploads/**/* *.lck .sass-cache/* -*.supported \ No newline at end of file +*.supported diff --git a/solr/conf/admin-extra.html b/solr/conf/admin-extra.html new file mode 100644 index 000000000..aa739da86 --- /dev/null +++ b/solr/conf/admin-extra.html @@ -0,0 +1,31 @@ + + + diff --git a/solr/conf/elevate.xml b/solr/conf/elevate.xml new file mode 100644 index 000000000..7630ebe20 --- /dev/null +++ b/solr/conf/elevate.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + diff --git a/solr/conf/mapping-ISOLatin1Accent.txt b/solr/conf/mapping-ISOLatin1Accent.txt new file mode 100644 index 000000000..ede774258 --- /dev/null +++ b/solr/conf/mapping-ISOLatin1Accent.txt @@ -0,0 +1,246 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Syntax: +# "source" => "target" +# "source".length() > 0 (source cannot be empty.) +# "target".length() >= 0 (target can be empty.) + +# example: +# "À" => "A" +# "\u00C0" => "A" +# "\u00C0" => "\u0041" +# "ß" => "ss" +# "\t" => " " +# "\n" => "" + +# À => A +"\u00C0" => "A" + +# Á => A +"\u00C1" => "A" + +#  => A +"\u00C2" => "A" + +# à => A +"\u00C3" => "A" + +# Ä => A +"\u00C4" => "A" + +# Å => A +"\u00C5" => "A" + +# Æ => AE +"\u00C6" => "AE" + +# Ç => C +"\u00C7" => "C" + +# È => E +"\u00C8" => "E" + +# É => E +"\u00C9" => "E" + +# Ê => E +"\u00CA" => "E" + +# Ë => E +"\u00CB" => "E" + +# Ì => I +"\u00CC" => "I" + +# Í => I +"\u00CD" => "I" + +# Î => I +"\u00CE" => "I" + +# Ï => I +"\u00CF" => "I" + +# IJ => IJ +"\u0132" => "IJ" + +# Ð => D +"\u00D0" => "D" + +# Ñ => N +"\u00D1" => "N" + +# Ò => O +"\u00D2" => "O" + +# Ó => O +"\u00D3" => "O" + +# Ô => O +"\u00D4" => "O" + +# Õ => O +"\u00D5" => "O" + +# Ö => O +"\u00D6" => "O" + +# Ø => O +"\u00D8" => "O" + +# Œ => OE +"\u0152" => "OE" + +# Þ +"\u00DE" => "TH" + +# Ù => U +"\u00D9" => "U" + +# Ú => U +"\u00DA" => "U" + +# Û => U +"\u00DB" => "U" + +# Ü => U +"\u00DC" => "U" + +# Ý => Y +"\u00DD" => "Y" + +# Ÿ => Y +"\u0178" => "Y" + +# à => a +"\u00E0" => "a" + +# á => a +"\u00E1" => "a" + +# â => a +"\u00E2" => "a" + +# ã => a +"\u00E3" => "a" + +# ä => a +"\u00E4" => "a" + +# å => a +"\u00E5" => "a" + +# æ => ae +"\u00E6" => "ae" + +# ç => c +"\u00E7" => "c" + +# è => e +"\u00E8" => "e" + +# é => e +"\u00E9" => "e" + +# ê => e +"\u00EA" => "e" + +# ë => e +"\u00EB" => "e" + +# ì => i +"\u00EC" => "i" + +# í => i +"\u00ED" => "i" + +# î => i +"\u00EE" => "i" + +# ï => i +"\u00EF" => "i" + +# ij => ij +"\u0133" => "ij" + +# ð => d +"\u00F0" => "d" + +# ñ => n +"\u00F1" => "n" + +# ò => o +"\u00F2" => "o" + +# ó => o +"\u00F3" => "o" + +# ô => o +"\u00F4" => "o" + +# õ => o +"\u00F5" => "o" + +# ö => o +"\u00F6" => "o" + +# ø => o +"\u00F8" => "o" + +# œ => oe +"\u0153" => "oe" + +# ß => ss +"\u00DF" => "ss" + +# þ => th +"\u00FE" => "th" + +# ù => u +"\u00F9" => "u" + +# ú => u +"\u00FA" => "u" + +# û => u +"\u00FB" => "u" + +# ü => u +"\u00FC" => "u" + +# ý => y +"\u00FD" => "y" + +# ÿ => y +"\u00FF" => "y" + +# ff => ff +"\uFB00" => "ff" + +# fi => fi +"\uFB01" => "fi" + +# fl => fl +"\uFB02" => "fl" + +# ffi => ffi +"\uFB03" => "ffi" + +# ffl => ffl +"\uFB04" => "ffl" + +# ſt => ft +"\uFB05" => "ft" + +# st => st +"\uFB06" => "st" diff --git a/solr/conf/protwords.txt b/solr/conf/protwords.txt new file mode 100644 index 000000000..1dfc0abec --- /dev/null +++ b/solr/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/conf/schema.xml b/solr/conf/schema.xml new file mode 100644 index 000000000..17a751ad0 --- /dev/null +++ b/solr/conf/schema.xml @@ -0,0 +1,238 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + text + + + + diff --git a/solr/conf/scripts.conf b/solr/conf/scripts.conf new file mode 100644 index 000000000..f58b262ae --- /dev/null +++ b/solr/conf/scripts.conf @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +user= +solr_hostname=localhost +solr_port=8983 +rsyncd_port=18983 +data_dir= +webapp_name=solr +master_host= +master_data_dir= +master_status_dir= diff --git a/solr/conf/solrconfig.xml b/solr/conf/solrconfig.xml new file mode 100644 index 000000000..65c196073 --- /dev/null +++ b/solr/conf/solrconfig.xml @@ -0,0 +1,934 @@ + + + + + + ${solr.abortOnConfigurationError:true} + + + + + + + + + + + + + ${solr.data.dir:./solr/data} + + + + false + 10 + + + + 32 + + 10000 + 1000 + 10000 + + + + + + + + native + + + + + + false + 32 + 10 + + + + + + false + + true + + + + + + 1 + + 0 + + + + false + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + true + + + + + 20 + + 200 + + + + + + + + + + + + solr rocks + 0 + 10 + + + static firstSearcher warming query from solrconfig.xml + + + + + false + + 2 + + + + + + + + + + + + + + + + + + explicit + + + + + + + + + + + dismax + explicit + 0.01 + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + + text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 + + + popularity^0.5 recip(price,1,1000,1000)^0.3 + + + id,name,price,score + + + 2<-1 5<-2 6<90% + + 100 + *:* + + text features name + + 0 + + name + regex + + + + + + + dismax + explicit + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 + 2<-1 5<-2 6<90% + + incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2 + + + + inStock:true + + + + cat + manu_exact + price:[* TO 500] + price:[500 TO *] + + + + + + textSpell + + default + name + ./spellchecker + + + + + + + + + false + + false + + 1 + + + spellcheck + + + + + + + true + + + tvComponent + + + + + + + + default + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + 20 + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + true + default + true + + name + id + + features + + true + + + + false + + + clusteringComponent + + + + + + + text + true + ignored_ + + true + links + ignored_ + + + + + + + true + + + termsComponent + + + + + + string + elevate.xml + + + + + explicit + + + elevator + + + + + + + + + + + + + + + + + + standard + solrpingquery + all + + + + + + explicit + + true + + + + + + + + 100 + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + ]]> + ]]> + + + + + + + + + 5 + + + + + + solr + + + + + 1 + 2 + + + diff --git a/solr/conf/spellings.txt b/solr/conf/spellings.txt new file mode 100644 index 000000000..d7ede6f56 --- /dev/null +++ b/solr/conf/spellings.txt @@ -0,0 +1,2 @@ +pizza +history \ No newline at end of file diff --git a/solr/conf/stopwords.txt b/solr/conf/stopwords.txt new file mode 100644 index 000000000..b5824da32 --- /dev/null +++ b/solr/conf/stopwords.txt @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +#Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +s +such +t +that +the +their +then +there +these +they +this +to +was +will +with + diff --git a/solr/conf/synonyms.txt b/solr/conf/synonyms.txt new file mode 100644 index 000000000..b0e31cb7e --- /dev/null +++ b/solr/conf/synonyms.txt @@ -0,0 +1,31 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaa => aaaa +bbb => bbbb1 bbbb2 +ccc => cccc1,cccc2 +a\=>a => b\=>b +a\,a => b\,b +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma +