tmp commit for search. start nccu0621 first

This commit is contained in:
Matthew K. Fu JuYuan 2012-06-22 11:34:10 +08:00
parent cb17fe0825
commit 3f95fe4f13
20 changed files with 949 additions and 1792 deletions

View File

@ -37,9 +37,8 @@ gem 'therubyracer' if RUBY_PLATFORM.downcase.include?("linux")
gem "impressionist", :require => "impressionist", :path => "vendor/impressionist"
# gem "tire"
gem 'sunspot_rails', "~> 1.3.2"
gem 'sunspot_solr'
gem "tire"
# Gems used only for assets and not required
# in production environments by default.

View File

@ -96,6 +96,7 @@ GEM
railties (>= 3.0.0)
fastercsv (1.5.4)
haml (3.1.4)
hashr (0.0.21)
hike (1.2.1)
hoe (2.16.1)
rake (~> 0.8)
@ -185,6 +186,8 @@ GEM
redis (>= 2.0.1)
resque (>= 1.8.0)
rufus-scheduler
rest-client (1.6.7)
mime-types (>= 1.16)
rsolr (1.0.8)
builder (>= 2.1.2)
rspec (2.8.0)
@ -253,13 +256,18 @@ GEM
sunspot_rails (1.3.3)
nokogiri
sunspot (= 1.3.3)
sunspot_solr (1.3.3)
therubyracer (0.9.9)
libv8 (~> 3.3.10)
thor (0.14.6)
tilt (1.3.3)
tinymce-rails (3.4.8)
railties (>= 3.1)
tire (0.4.2)
activemodel (>= 3.0)
hashr (~> 0.0.19)
multi_json (~> 1.0)
rake
rest-client (~> 1.6)
transaction-simple (1.4.0)
hoe (>= 1.1.7)
treetop (1.4.10)
@ -327,9 +335,8 @@ DEPENDENCIES
spork
sprockets
sunspot-rails-tester
sunspot_rails (~> 1.3.2)
sunspot_solr
therubyracer
tinymce-rails
tire
uglifier
watchr

View File

@ -1,46 +0,0 @@
# coding: utf-8
# 基本 Model加入一些通用功能
module Mongoid
module BaseModel
extend ActiveSupport::Concern
included do
scope :recent, desc(:_id)
scope :exclude_ids, Proc.new { |ids| where(:_id.nin => ids.map(&:to_i)) }
scope :by_week, where(:created_at.gte => 7.days.ago.utc)
end
module ClassMethods
# like ActiveRecord find_by_id
def find_by_id(id)
if id.is_a?(Integer) or id.is_a?(String)
where(:_id => id.to_i).first
else
nil
end
end
def find_in_batches(opts = {})
batch_size = opts[:batch_size] || 1000
start = opts.delete(:start).to_i || 0
objects = self.limit(batch_size).skip(start)
t = Time.new
while objects.any?
yield objects
start += batch_size
# Rails.logger.debug("processed #{start} records in #{Time.new - t} seconds") if Rails.logger.debug?
break if objects.size < batch_size
objects = self.limit(batch_size).skip(start)
end
end
def delay
Sidekiq::Extensions::Proxy.new(DelayedDocument, self)
end
end
def delay
Sidekiq::Extensions::Proxy.new(DelayedDocument, self)
end
end
end

View File

@ -1,46 +0,0 @@
# coding: utf-8
# this is from : https://github.com/jugyo/sunspot_mongoid
# this file is special for mongoid_auto_increment_id
require 'sunspot'
require 'mongoid'
require 'sunspot/rails'
# == Examples:
#
# class Post
# include Mongoid::Document
# field :title
#
# include Sunspot::Mongoid
# searchable do
# text :title
# end
# end
#
module Sunspot
module Mongoid
def self.included(base)
base.class_eval do
extend Sunspot::Rails::Searchable::ActsAsMethods
Sunspot::Adapters::DataAccessor.register(DataAccessor, base)
Sunspot::Adapters::InstanceAdapter.register(InstanceAdapter, base)
end
end
class InstanceAdapter < Sunspot::Adapters::InstanceAdapter
def id
@instance.id
end
end
class DataAccessor < Sunspot::Adapters::DataAccessor
def load(id)
@clazz.where(:_id => id).first
end
def load_all(ids)
@clazz.where(:_id.in => ids.collect { |id| id.to_i })
end
end
end
end

View File

@ -5,6 +5,7 @@ num_workers = rails_env == 'production' ? 5 : 2
num_workers.times do |num|
God.watch do |w|
w.dir = "#{rails_root}"
w.log = "#{rails_root}/log/myprocess.log"
w.name = "resque-#{num}"
w.group = 'resque'
w.interval = 30.seconds

View File

@ -14,7 +14,7 @@ $(document).ready(function() {
function ajax_load_proc(wapper,url){
wapper.load(url,function(respText,textSta,XML){
if(textSta == 'error')
wapper.html("Loading Failed");
wapper.html("Loading Failed<br/> <a href='"+$(this).attr('path')+"'>Go See</a>");
});
}

View File

@ -1,31 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- The content of this page will be statically included into the top
of the admin page. Uncomment this as an example to see there the content
will show up.
<hr>
<i>This line will appear before the first table</i>
<tr>
<td colspan="2">
This row will be appended to the end of the first table
</td>
</tr>
<hr>
-->

View File

@ -1,36 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
-->
<elevate>
<query text="foo bar">
<doc id="1" />
<doc id="2" />
<doc id="3" />
</query>
<query text="ipod">
<doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
<doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
</query>
</elevate>

View File

@ -1,246 +0,0 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Syntax:
# "source" => "target"
# "source".length() > 0 (source cannot be empty.)
# "target".length() >= 0 (target can be empty.)
# example:
# "À" => "A"
# "\u00C0" => "A"
# "\u00C0" => "\u0041"
# "ß" => "ss"
# "\t" => " "
# "\n" => ""
# À => A
"\u00C0" => "A"
# Á => A
"\u00C1" => "A"
# Â => A
"\u00C2" => "A"
# Ã => A
"\u00C3" => "A"
# Ä => A
"\u00C4" => "A"
# Å => A
"\u00C5" => "A"
# Æ => AE
"\u00C6" => "AE"
# Ç => C
"\u00C7" => "C"
# È => E
"\u00C8" => "E"
# É => E
"\u00C9" => "E"
# Ê => E
"\u00CA" => "E"
# Ë => E
"\u00CB" => "E"
# Ì => I
"\u00CC" => "I"
# Í => I
"\u00CD" => "I"
# Î => I
"\u00CE" => "I"
# Ï => I
"\u00CF" => "I"
# IJ => IJ
"\u0132" => "IJ"
# Ð => D
"\u00D0" => "D"
# Ñ => N
"\u00D1" => "N"
# Ò => O
"\u00D2" => "O"
# Ó => O
"\u00D3" => "O"
# Ô => O
"\u00D4" => "O"
# Õ => O
"\u00D5" => "O"
# Ö => O
"\u00D6" => "O"
# Ø => O
"\u00D8" => "O"
# Œ => OE
"\u0152" => "OE"
# Þ
"\u00DE" => "TH"
# Ù => U
"\u00D9" => "U"
# Ú => U
"\u00DA" => "U"
# Û => U
"\u00DB" => "U"
# Ü => U
"\u00DC" => "U"
# Ý => Y
"\u00DD" => "Y"
# Ÿ => Y
"\u0178" => "Y"
# à => a
"\u00E0" => "a"
# á => a
"\u00E1" => "a"
# â => a
"\u00E2" => "a"
# ã => a
"\u00E3" => "a"
# ä => a
"\u00E4" => "a"
# å => a
"\u00E5" => "a"
# æ => ae
"\u00E6" => "ae"
# ç => c
"\u00E7" => "c"
# è => e
"\u00E8" => "e"
# é => e
"\u00E9" => "e"
# ê => e
"\u00EA" => "e"
# ë => e
"\u00EB" => "e"
# ì => i
"\u00EC" => "i"
# í => i
"\u00ED" => "i"
# î => i
"\u00EE" => "i"
# ï => i
"\u00EF" => "i"
# ij => ij
"\u0133" => "ij"
# ð => d
"\u00F0" => "d"
# ñ => n
"\u00F1" => "n"
# ò => o
"\u00F2" => "o"
# ó => o
"\u00F3" => "o"
# ô => o
"\u00F4" => "o"
# õ => o
"\u00F5" => "o"
# ö => o
"\u00F6" => "o"
# ø => o
"\u00F8" => "o"
# œ => oe
"\u0153" => "oe"
# ß => ss
"\u00DF" => "ss"
# þ => th
"\u00FE" => "th"
# ù => u
"\u00F9" => "u"
# ú => u
"\u00FA" => "u"
# û => u
"\u00FB" => "u"
# ü => u
"\u00FC" => "u"
# ý => y
"\u00FD" => "y"
# ÿ => y
"\u00FF" => "y"
# ff => ff
"\uFB00" => "ff"
# fi => fi
"\uFB01" => "fi"
# fl => fl
"\uFB02" => "fl"
# ffi => ffi
"\uFB03" => "ffi"
# ffl => ffl
"\uFB04" => "ffl"
# ſt => ft
"\uFB05" => "ft"
# st => st
"\uFB06" => "st"

View File

@ -1,21 +0,0 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# Use a protected word file to protect against the stemmer reducing two
# unrelated words to the same base word.
# Some non-words that normally won't be encountered,
# just to test that they won't be stemmed.
dontstems
zwhacky

View File

@ -1,245 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This is the Solr schema file. This file should be named "schema.xml" and
should be in the conf directory under the solr home
(i.e. ./solr/conf/schema.xml by default)
or located where the classloader for the Solr webapp can find it.
This example schema is the recommended starting point for users.
It should be kept correct and concise, usable out-of-the-box.
For more information, on how to customize this file, please see
http://wiki.apache.org/solr/SchemaXml
PERFORMANCE NOTE: this schema includes many optional features and should not
be used for benchmarking. To improve performance one could
- set stored="false" for all fields possible (esp large fields) when you
only need to search on the field but don't need to return the original
value.
- set indexed="false" if you don't need to search on the field, but only
return the field as a result of searching on other indexed fields.
- remove all unneeded copyField statements
- for best index size and searching performance, set "index" to false
for all general text fields, use copyField to copy them to the
catchall "text" field, and use that for searching.
- For maximum indexing performance, use the StreamingUpdateSolrServer
java client.
- Remember to run the JVM in server mode, and use a higher logging level
that avoids logging every request
-->
<schema name="sunspot" version="1.0">
<types>
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real
behavior of the fieldType.
Class names starting with "solr" refer to java classes in the
org.apache.solr.analysis package.
-->
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="string" class="solr.StrField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="text" class="solr.TextField" omitNorms="false">
<analyzer>
<tokenizer class="solr.CJKTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.NGramFilterFactory" minGramSize="2" maxGramSize="15"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="boolean" class="solr.BoolField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="date" class="solr.DateField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="sdouble" class="solr.SortableDoubleField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="sfloat" class="solr.SortableFloatField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="sint" class="solr.SortableIntField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tint" class="solr.TrieIntField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tfloat" class="solr.TrieFloatField" omitNorms="true"/>
<!-- *** This fieldType is used by Sunspot! *** -->
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true"/>
</types>
<fields>
<!-- Valid attributes for fields:
name: mandatory - the name for the field
type: mandatory - the name of a previously defined type from the
<types> section
indexed: true if this field should be indexed (searchable or sortable)
stored: true if this field should be retrievable
compressed: [false] if this field should be stored using gzip compression
(this will only apply if the field type is compressable; among
the standard field types, only TextField and StrField are)
multiValued: true if this field may contain multiple values per document
omitNorms: (expert) set to true to omit the norms associated with
this field (this disables length normalization and index-time
boosting for the field, and saves some memory). Only full-text
fields or fields that need an index-time boost need norms.
termVectors: [false] set to true to store the term vector for a
given field.
When using MoreLikeThis, fields used for similarity should be
stored for best performance.
termPositions: Store position information with the term vector.
This will increase storage costs.
termOffsets: Store offset information with the term vector. This
will increase storage costs.
default: a value that should be used if no value is specified
when adding a document.
-->
<!-- *** This field is used by Sunspot! *** -->
<field name="id" stored="true" type="string" multiValued="false" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="type" stored="false" type="string" multiValued="true" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="class_name" stored="false" type="string" multiValued="false" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="text" stored="false" type="string" multiValued="true" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="lat" stored="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This field is used by Sunspot! *** -->
<field name="lng" stored="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="random_*" stored="false" type="rand" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="_local*" stored="false" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_text" stored="false" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_texts" stored="true" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_b" stored="false" type="boolean" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_bm" stored="false" type="boolean" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_bs" stored="true" type="boolean" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_bms" stored="true" type="boolean" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_d" stored="false" type="date" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dm" stored="false" type="date" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ds" stored="true" type="date" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dms" stored="true" type="date" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_e" stored="false" type="sdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_em" stored="false" type="sdouble" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_es" stored="true" type="sdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ems" stored="true" type="sdouble" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_f" stored="false" type="sfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fm" stored="false" type="sfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fs" stored="true" type="sfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fms" stored="true" type="sfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_i" stored="false" type="sint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_im" stored="false" type="sint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_is" stored="true" type="sint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ims" stored="true" type="sint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_l" stored="false" type="slong" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_lm" stored="false" type="slong" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ls" stored="true" type="slong" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_lms" stored="true" type="slong" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_s" stored="false" type="string" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_sm" stored="false" type="string" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ss" stored="true" type="string" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_sms" stored="true" type="string" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_it" stored="false" type="tint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_itm" stored="false" type="tint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_its" stored="true" type="tint" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_itms" stored="true" type="tint" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ft" stored="false" type="tfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ftm" stored="false" type="tfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_fts" stored="true" type="tfloat" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ftms" stored="true" type="tfloat" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dt" stored="false" type="tdate" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dtm" stored="false" type="tdate" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dts" stored="true" type="tdate" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_dtms" stored="true" type="tdate" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_textv" stored="false" termVectors="true" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_textsv" stored="true" termVectors="true" type="text" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_et" stored="false" termVectors="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_etm" stored="false" termVectors="true" type="tdouble" multiValued="true" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_ets" stored="true" termVectors="true" type="tdouble" multiValued="false" indexed="true"/>
<!-- *** This dynamicField is used by Sunspot! *** -->
<dynamicField name="*_etms" stored="true" termVectors="true" type="tdouble" multiValued="true" indexed="true"/>
</fields>
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>text</defaultSearchField>
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="AND"/>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster searching. -->
</schema>

View File

@ -1,24 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
user=
solr_hostname=localhost
solr_port=8983
rsyncd_port=18983
data_dir=
webapp_name=solr
master_host=
master_data_dir=
master_status_dir=

View File

@ -1,938 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
For more details about configurations options that may appear in this
file, see http://wiki.apache.org/solr/SolrConfigXml.
Specifically, the Solr Config can support XInclude, which may make it easier to manage
the configuration. See https://issues.apache.org/jira/browse/SOLR-1167
-->
<config>
<!-- Set this to 'false' if you want solr to continue working after it has
encountered an severe configuration error. In a production environment,
you may want solr to keep working even if one handler is mis-configured.
You may also set this to false using by setting the system property:
-Dsolr.abortOnConfigurationError=false
-->
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
<!-- lib directives can be used to instruct Solr to load an Jars identified
and use them to resolve any "plugins" specified in your solrconfig.xml or
schema.xml (ie: Analyzers, Request Handlers, etc...).
All directories and paths are resolved relative the instanceDir.
If a "./lib" directory exists in your instanceDir, all files found in it
are included as if you had used the following syntax...
<lib dir="./lib" />
-->
<!-- A dir option by itself adds any files found in the directory to the
classpath, this is useful for including all jars in a directory.
-->
<lib dir="../../contrib/extraction/lib"/>
<!-- When a regex is specified in addition to a directory, only the files in that
directory which completely match the regex (anchored on both ends)
will be included.
-->
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar"/>
<lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar"/>
<!-- If a dir option (with or without a regex) is used and nothing is found
that matches, it will be ignored
-->
<lib dir="../../contrib/clustering/lib/downloads/"/>
<lib dir="../../contrib/clustering/lib/"/>
<lib dir="/total/crap/dir/ignored"/>
<!-- an exact path can be used to specify a specific file. This will cause
a serious error to be logged if it can't be loaded.
<lib path="../a-jar-that-does-not-exist.jar" />
-->
<!-- Used to specify an alternate directory to hold all index data
other than the default ./data under the Solr home.
If replication is in use, this should match the replication configuration. -->
<dataDir>${solr.data.dir:./solr/data}</dataDir>
<!-- WARNING: this <indexDefaults> section only provides defaults for index writers
in general. See also the <mainIndex> section after that when changing parameters
for Solr's main Lucene index. -->
<indexDefaults>
<!-- Values here affect all index writers and act as a default unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<!-- If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush
based on whichever limit is hit first. -->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<!-- Sets the amount of RAM that may be used by Lucene indexing
for buffering added documents and deletions before they are
flushed to the Directory. -->
<ramBufferSizeMB>32</ramBufferSizeMB>
<!-- <maxMergeDocs>2147483647</maxMergeDocs> -->
<maxFieldLength>10000</maxFieldLength>
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<!--
Expert: Turn on Lucene's auto commit capability. This causes intermediate
segment flushes to write a new lucene index descriptor, enabling it to be
opened by an external IndexReader. This can greatly slow down indexing
speed. NOTE: Despite the name, this value does not have any relation to
Solr's autoCommit functionality
-->
<!--<luceneAutoCommit>false</luceneAutoCommit>-->
<!--
Expert: The Merge Policy in Lucene controls how merging is handled by
Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
versions used LogDocMergePolicy.
LogByteSizeMergePolicy chooses segments to merge based on their size. The
Lucene 2.2 default, LogDocMergePolicy chose when to merge based on number
of documents
Other implementations of MergePolicy must have a no-argument constructor
-->
<!--<mergePolicy class="org.apache.lucene.index.LogByteSizeMergePolicy"/>-->
<!--
Expert:
The Merge Scheduler in Lucene controls how merges are performed. The
ConcurrentMergeScheduler (Lucene 2.3 default) can perform merges in the
background using separate threads. The SerialMergeScheduler (Lucene 2.2
default) does not.
-->
<!--<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>-->
<!--
This option specifies which Lucene LockFactory implementation to use.
single = SingleInstanceLockFactory - suggested for a read-only index
or when there is no possibility of another process trying
to modify the index.
native = NativeFSLockFactory - uses OS native file locking
simple = SimpleFSLockFactory - uses a plain file for locking
(For backwards compatibility with Solr 1.2, 'simple' is the default
if not specified.)
-->
<lockType>native</lockType>
<!--
Expert:
Controls how often Lucene loads terms into memory -->
<!--<termIndexInterval>256</termIndexInterval>-->
</indexDefaults>
<mainIndex>
<!-- options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<ramBufferSizeMB>32</ramBufferSizeMB>
<mergeFactor>10</mergeFactor>
<!-- Deprecated -->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<!--<maxMergeDocs>2147483647</maxMergeDocs>-->
<!-- inherit from indexDefaults <maxFieldLength>10000</maxFieldLength> -->
<!-- If true, unlock any held write or commit locks on startup.
This defeats the locking mechanism that allows multiple
processes to safely access a lucene index, and should be
used with care.
This is not needed if lock type is 'none' or 'single'
-->
<unlockOnStartup>false</unlockOnStartup>
<!-- If true, IndexReaders will be reopened (often more efficient) instead
of closed and then opened. -->
<reopenReaders>true</reopenReaders>
<!--
Expert:
Controls how often Lucene loads terms into memory. Default is 128 and is likely good for most everyone. -->
<!--<termIndexInterval>256</termIndexInterval>-->
<!--
Custom deletion policies can specified here. The class must
implement org.apache.lucene.index.IndexDeletionPolicy.
http://lucene.apache.org/java/2_3_2/api/org/apache/lucene/index/IndexDeletionPolicy.html
The standard Solr IndexDeletionPolicy implementation supports deleting
index commit points on number of commits, age of commit point and
optimized status.
The latest commit point should always be preserved regardless
of the criteria.
-->
<deletionPolicy class="solr.SolrDeletionPolicy">
<!-- The number of commit points to be kept -->
<str name="maxCommitsToKeep">1</str>
<!-- The number of optimized commit points to be kept -->
<str name="maxOptimizedCommitsToKeep">0</str>
<!--
Delete all commit points once they have reached the given age.
Supports DateMathParser syntax e.g.
<str name="maxCommitAge">30MINUTES</str>
<str name="maxCommitAge">1DAY</str>
-->
</deletionPolicy>
<!-- To aid in advanced debugging, you may turn on IndexWriter debug logging.
Setting to true will set the file that the underlying Lucene IndexWriter
will write its debug infostream to. -->
<infoStream file="INFOSTREAM.txt">false</infoStream>
</mainIndex>
<!-- Enables JMX if and only if an existing MBeanServer is found, use this
if you want to configure JMX through JVM parameters. Remove this to disable
exposing Solr configuration and statistics to JMX.
If you want to connect to a particular server, specify the agentId
e.g. <jmx agentId="myAgent" />
If you want to start a new MBeanServer, specify the serviceUrl
e.g <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
For more details see http://wiki.apache.org/solr/SolrJmx
-->
<jmx/>
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!-- A prefix of "solr." for class names is an alias that
causes solr to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis)
-->
<!-- Perform a <commit/> automatically under certain conditions:
maxDocs - number of updates since last commit is greater than this
maxTime - oldest uncommited update (in ms) is this long ago
Instead of enabling autoCommit, consider using "commitWithin"
when adding documents. http://wiki.apache.org/solr/UpdateXmlMessages
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>1000</maxTime>
</autoCommit>
-->
<!-- The RunExecutableListener executes an external command from a
hook such as postCommit or postOptimize.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit or optimize command
<listener event="postCommit" class="solr.RunExecutableListener">
<str name="exe">solr/bin/snapshooter</str>
<str name="dir">.</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
<!-- A postOptimize event is fired only after every optimize command
<listener event="postOptimize" class="solr.RunExecutableListener">
<str name="exe">snapshooter</str>
<str name="dir">solr/bin</str>
<bool name="wait">true</bool>
</listener>
-->
</updateHandler>
<!-- Use the following format to specify a custom IndexReaderFactory - allows for alternate
IndexReader implementations.
** Experimental Feature **
Please note - Using a custom IndexReaderFactory may prevent certain other features
from working. The API to IndexReaderFactory may change without warning or may even
be removed from future releases if the problems cannot be resolved.
** Features that may not work with custom IndexReaderFactory **
The ReplicationHandler assumes a disk-resident index. Using a custom
IndexReader implementation may cause incompatibility with ReplicationHandler and
may cause replication to not work correctly. See SOLR-1366 for details.
<indexReaderFactory name="IndexReaderFactory" class="package.class">
Parameters as required by the implementation
</indexReaderFactory >
-->
<!-- To set the termInfosIndexDivisor, do this: -->
<!--<indexReaderFactory name="IndexReaderFactory" class="org.apache.solr.core.StandardIndexReaderFactory">
<int name="termInfosIndexDivisor">12</int>
</indexReaderFactory >-->
<query>
<!-- Maximum number of clauses in a boolean query... in the past, this affected
range or prefix queries that expanded to big boolean queries - built in Solr
query parsers no longer create queries with this limitation.
An exception is thrown if exceeded. -->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- There are two implementations of cache available for Solr,
LRUCache, based on a synchronized LinkedHashMap, and
FastLRUCache, based on a ConcurrentHashMap. FastLRUCache has faster gets
and slower puts in single threaded operation and thus is generally faster
than LRUCache when the hit ratio of the cache is high (> 75%), and may be
faster under other scenarios on multi-cpu systems. -->
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query.
When a new searcher is opened, its caches may be prepopulated
or "autowarmed" using data from caches in the old searcher.
autowarmCount is the number of items to prepopulate. For LRUCache,
the autowarmed items will be the most recently accessed items.
Parameters:
class - the SolrCache implementation LRUCache or FastLRUCache
size - the maximum number of entries in the cache
initialSize - the initial capacity (number of entries) of
the cache. (seel java.util.HashMap)
autowarmCount - the number of entries to prepopulate from
and old cache.
-->
<filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0"/>
<!-- Cache used to hold field values that are quickly accessible
by document id. The fieldValueCache is created by default
even if not configured here.
<fieldValueCache
class="solr.FastLRUCache"
size="512"
autowarmCount="128"
showItems="32"
/>
-->
<!-- queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range
of documents requested. -->
<queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
<documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
This can result in a significant speed improvement if the usual case is to
not load all stored fields, especially if the skipped fields are large
compressed text fields.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!-- Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
The purpose is to enable easy caching of user/application level data.
The regenerator argument should be specified as an implementation
of solr.search.CacheRegenerator if autowarming is desired. -->
<!--
<cache name="myUserCache"
class="solr.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator"
/>
-->
<!-- An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the filter
will be used as the source of document ids, and then the sort will be
applied to that.
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!-- An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 49 will be collected and cached. Any further
requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>20</queryResultWindowSize>
<!-- Maximum number of documents to cache for any entry in the
queryResultCache. -->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered).
It can be used to prime certain caches to prevent long request times for
certain requests.
-->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<!--
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
-->
</arr>
</listener>
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from. -->
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">solr rocks</str>
<str name="start">0</str>
<str name="rows">10</str>
</lst>
<lst>
<str name="q">static firstSearcher warming query from solrconfig.xml</str>
</lst>
</arr>
</listener>
<!-- If a search request comes in and there is no current registered searcher,
then immediately register the still warming searcher and use it. If
"false" then all requests will block until the first searcher is done
warming. -->
<useColdSearcher>false</useColdSearcher>
<!-- Maximum number of searchers that may be warming in the background
concurrently. An error is returned if this limit is exceeded. Recommend
1-2 for read-only slaves, higher for masters w/o cache warming. -->
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<!--
Let the dispatch filter handler /select?qt=XXX
handleSelect=true will use consistent error handling for /select and /update
handleSelect=false will use solr1.1 style error formatting
-->
<requestDispatcher handleSelect="true">
<!--Make sure your system has some authentication before enabling remote streaming! -->
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000"/>
<!-- Set HTTP caching related parameters (for proxy caches and clients).
To get the behaviour of Solr 1.2 (ie: no caching related headers)
use the never304="true" option and do not specify a value for
<cacheControl>
-->
<!-- <httpCaching never304="true"> -->
<httpCaching lastModifiedFrom="openTime" etagSeed="Solr">
<!-- lastModFrom="openTime" is the default, the Last-Modified value
(and validation against If-Modified-Since requests) will all be
relative to when the current Searcher was opened.
You can change it to lastModFrom="dirLastMod" if you want the
value to exactly corrispond to when the physical index was last
modified.
etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be
differnet even if the index has not changed (ie: when making
significant changes to your config file)
lastModifiedFrom and etagSeed are both ignored if you use the
never304="true" option.
-->
<!-- If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header, as well as an Expires header
if the value contains "max-age="
By default, no Cache-Control header is generated.
You can use the <cacheControl> option even if you have set
never304="true"
-->
<!-- <cacheControl>max-age=30, public</cacheControl> -->
</httpCaching>
</requestDispatcher>
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the path or the qt (query type) param.
Names starting with a '/' are accessed with the a path equal to the
registered name. Names without a leading '/' are accessed with:
http://host/app/select?qt=name
If no qt is defined, the requestHandler that declares default="true"
will be used.
-->
<requestHandler name="standard" class="solr.SearchHandler" default="true">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="echoParams">explicit</str>
<!--
<int name="rows">10</int>
<str name="fl">*</str>
<str name="version">2.1</str>
-->
</lst>
</requestHandler>
<!-- Please refer to http://wiki.apache.org/solr/SolrReplication for details on configuring replication -->
<!-- remove the <lst name="master"> section if this is just a slave -->
<!-- remove the <lst name="slave"> section if this is just a master -->
<!--
<requestHandler name="/replication" class="solr.ReplicationHandler" >
<lst name="master">
<str name="replicateAfter">commit</str>
<str name="replicateAfter">startup</str>
<str name="confFiles">schema.xml,stopwords.txt</str>
</lst>
<lst name="slave">
<str name="masterUrl">http://localhost:8983/solr/replication</str>
<str name="pollInterval">00:00:60</str>
</lst>
</requestHandler>-->
<!-- DisMaxRequestHandler allows easy searching across multiple fields
for simple user-entered phrases. It's implementation is now
just the standard SearchHandler with a default query type
of "dismax".
see http://wiki.apache.org/solr/DisMaxRequestHandler
-->
<requestHandler name="dismax" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="echoParams">explicit</str>
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str>
<str name="pf">
text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9
</str>
<str name="bf">
popularity^0.5 recip(price,1,1000,1000)^0.3
</str>
<str name="fl">
id,name,price,score
</str>
<str name="mm">
2&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
<str name="q.alt">*:*</str>
<!-- example highlighter config, enable per-query with hl=true -->
<str name="hl.fl">text features name</str>
<!-- for this field, we want no fragmenting, just highlighting -->
<str name="f.name.hl.fragsize">0</str>
<!-- instructs Solr to return the field itself if no query terms are
found -->
<str name="f.name.hl.alternateField">name</str>
<str name="f.text.hl.fragmenter">regex</str>
<!-- defined below -->
</lst>
</requestHandler>
<!-- Note how you can register the same handler multiple times with
different names (and different init parameters)
-->
<requestHandler name="partitioned" class="solr.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
<str name="echoParams">explicit</str>
<str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str>
<str name="mm">2&lt;-1 5&lt;-2 6&lt;90%</str>
<!-- This is an example of using Date Math to specify a constantly
moving date range in a config...
-->
<str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str>
</lst>
<!-- In addition to defaults, "appends" params can be specified
to identify values which should be appended to the list of
multi-val params from the query (or the existing "defaults").
In this example, the param "fq=instock:true" will be appended to
any query time fq params the user may specify, as a mechanism for
partitioning the index, independent of any user selected filtering
that may also be desired (perhaps as a result of faceted searching).
NOTE: there is *absolutely* nothing a client can do to prevent these
"appends" values from being used, so don't use this mechanism
unless you are sure you always want it.
-->
<lst name="appends">
<str name="fq">inStock:true</str>
</lst>
<!-- "invariants" are a way of letting the Solr maintainer lock down
the options available to Solr clients. Any params values
specified here are used regardless of what values may be specified
in either the query, the "defaults", or the "appends" params.
In this example, the facet.field and facet.query params are fixed,
limiting the facets clients can use. Faceting is not turned on by
default - but if the client does specify facet=true in the request,
these are the only facets they will be able to see counts for;
regardless of what other facet.field or facet.query params they
may specify.
NOTE: there is *absolutely* nothing a client can do to prevent these
"invariants" values from being used, so don't use this mechanism
unless you are sure you always want it.
-->
<lst name="invariants">
<str name="facet.field">cat</str>
<str name="facet.field">manu_exact</str>
<str name="facet.query">price:[* TO 500]</str>
<str name="facet.query">price:[500 TO *]</str>
</lst>
</requestHandler>
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
<!--
Search components are registered to SolrCore and used by Search Handlers
By default, the following components are avaliable:
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" />
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
Default configuration in a requestHandler would look like:
<arr name="components">
<str>query</str>
<str>facet</str>
<str>mlt</str>
<str>highlight</str>
<str>stats</str>
<str>debug</str>
</arr>
If you register a searchComponent to one of the standard names, that will be used instead.
To insert components before or after the 'standard' components, use:
<arr name="first-components">
<str>myFirstComponentName</str>
</arr>
<arr name="last-components">
<str>myLastComponentName</str>
</arr>
-->
<!-- The spell check component can return a list of alternative spelling
suggestions. -->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">./spellchecker</str>
</lst>
<!-- a spellchecker that uses a different distance measure
<lst name="spellchecker">
<str name="name">jarowinkler</str>
<str name="field">spell</str>
<str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str>
<str name="spellcheckIndexDir">./spellchecker2</str>
</lst>
-->
<!-- a file based spell checker
<lst name="spellchecker">
<str name="classname">solr.FileBasedSpellChecker</str>
<str name="name">file</str>
<str name="sourceLocation">spellings.txt</str>
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">./spellcheckerFile</str>
</lst>
-->
</searchComponent>
<!-- A request handler utilizing the spellcheck component.
#############################################################################
NOTE: This is purely as an example. The whole purpose of the
SpellCheckComponent is to hook it into the request handler that handles (i.e.
the standard or dismax SearchHandler) queries such that a separate request is
not needed to get suggestions.
IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU
WANT FOR YOUR PRODUCTION SYSTEM!
#############################################################################
-->
<requestHandler name="/spell" class="solr.SearchHandler" lazy="true">
<lst name="defaults">
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
<!-- A Req Handler for working with the tvComponent. This is purely as an example.
You will likely want to add the component to your already specified request handlers. -->
<requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<bool name="tv">true</bool>
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>
<!-- Clustering Component
http://wiki.apache.org/solr/ClusteringComponent
This relies on third party jars which are not included in the release.
To use this component (and the "/clustering" handler)
Those jars will need to be downloaded, and you'll need to set the
solr.cluster.enabled system property when running solr...
java -Dsolr.clustering.enabled=true -jar start.jar
-->
<searchComponent name="clusteringComponent" enable="${solr.clustering.enabled:false}" class="org.apache.solr.handler.clustering.ClusteringComponent">
<!-- Declare an engine -->
<lst name="engine">
<!-- The name, only one can be named "default" -->
<str name="name">default</str>
<!--
Class name of Carrot2 clustering algorithm. Currently available algorithms are:
* org.carrot2.clustering.lingo.LingoClusteringAlgorithm
* org.carrot2.clustering.stc.STCClusteringAlgorithm
See http://project.carrot2.org/algorithms.html for the algorithm's characteristics.
-->
<str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
<!--
Overriding values for Carrot2 default algorithm attributes. For a description
of all available attributes, see: http://download.carrot2.org/stable/manual/#chapter.components.
Use attribute key as name attribute of str elements below. These can be further
overridden for individual requests by specifying attribute key as request
parameter name and attribute value as parameter value.
-->
<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
</lst>
<lst name="engine">
<str name="name">stc</str>
<str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
</lst>
</searchComponent>
<requestHandler name="/clustering" enable="${solr.clustering.enabled:false}" class="solr.SearchHandler">
<lst name="defaults">
<bool name="clustering">true</bool>
<str name="clustering.engine">default</str>
<bool name="clustering.results">true</bool>
<!-- The title field -->
<str name="carrot.title">name</str>
<str name="carrot.url">id</str>
<!-- The field to cluster on -->
<str name="carrot.snippet">features</str>
<!-- produce summaries -->
<bool name="carrot.produceSummary">true</bool>
<!-- the maximum number of labels per cluster -->
<!--<int name="carrot.numDescriptions">5</int>-->
<!-- produce sub clusters -->
<bool name="carrot.outputSubClusters">false</bool>
</lst>
<arr name="last-components">
<str>clusteringComponent</str>
</arr>
</requestHandler>
<!-- Solr Cell: http://wiki.apache.org/solr/ExtractingRequestHandler -->
<requestHandler name="/update/extract" class="org.apache.solr.handler.extraction.ExtractingRequestHandler" startup="lazy">
<lst name="defaults">
<!-- All the main content goes into "text"... if you need to return
the extracted text or do highlighting, use a stored field. -->
<str name="fmap.content">text</str>
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<!-- capture link hrefs but ignore div attributes -->
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>
<!-- A component to return terms and document frequency of those terms.
This component does not yet support distributed search. -->
<searchComponent name="termsComponent" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>termsComponent</str>
</arr>
</requestHandler>
<!-- a search component that enables you to configure the top results for
a given query regardless of the normal lucene scoring.-->
<searchComponent name="elevator" class="solr.QueryElevationComponent">
<!-- pick a fieldType to analyze queries -->
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>
<!-- a request handler utilizing the elevator component -->
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<arr name="last-components">
<str>elevator</str>
</arr>
</requestHandler>
<!-- Update request handler.
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
The response format differs from solr1.1 formatting and returns a standard error code.
To enable solr1.1 behavior, remove the /update handler or change its path
-->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler"/>
<requestHandler name="/update/javabin" class="solr.BinaryUpdateRequestHandler"/>
<!--
Analysis request handler. Since Solr 1.3. Use to return how a document is analyzed. Useful
for debugging and as a token server for other types of applications.
This is deprecated in favor of the improved DocumentAnalysisRequestHandler and FieldAnalysisRequestHandler
<requestHandler name="/analysis" class="solr.AnalysisRequestHandler" />
-->
<!--
An analysis handler that provides a breakdown of the analysis process of provided docuemnts. This handler expects a
(single) content stream with the following format:
<docs>
<doc>
<field name="id">1</field>
<field name="name">The Name</field>
<field name="text">The Text Value</field>
<doc>
<doc>...</doc>
<doc>...</doc>
...
</docs>
Note: Each document must contain a field which serves as the unique key. This key is used in the returned
response to assoicate an analysis breakdown to the analyzed document.
Like the FieldAnalysisRequestHandler, this handler also supports query analysis by
sending either an "analysis.query" or "q" request paraemter that holds the query text to be analyized. It also
supports the "analysis.showmatch" parameter which when set to true, all field tokens that match the query
tokens will be marked as a "match".
-->
<requestHandler name="/analysis/document" class="solr.DocumentAnalysisRequestHandler"/>
<!--
RequestHandler that provides much the same functionality as analysis.jsp. Provides the ability
to specify multiple field types and field names in the same request and outputs index-time and
query-time analysis for each of them.
Request parameters are:
analysis.fieldname - The field name whose analyzers are to be used
analysis.fieldtype - The field type whose analyzers are to be used
analysis.fieldvalue - The text for index-time analysis
q (or analysis.q) - The text for query time analysis
analysis.showmatch (true|false) - When set to true and when query analysis is performed, the produced
tokens of the field value analysis will be marked as "matched" for every
token that is produces by the query analysis
-->
<requestHandler name="/analysis/field" class="solr.FieldAnalysisRequestHandler"/>
<!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy"/>
<!--
Admin Handlers - This will register all the standard admin RequestHandlers. Adding
this single handler is equivalent to registering:
<requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler" />
<requestHandler name="/admin/system" class="org.apache.solr.handler.admin.SystemInfoHandler" />
<requestHandler name="/admin/plugins" class="org.apache.solr.handler.admin.PluginInfoHandler" />
<requestHandler name="/admin/threads" class="org.apache.solr.handler.admin.ThreadDumpHandler" />
<requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" />
<requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
If you wish to hide files under ${solr.home}/conf, explicitly register the ShowFileRequestHandler using:
<requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" >
<lst name="invariants">
<str name="hidden">synonyms.txt</str>
<str name="hidden">anotherfile.txt</str>
</lst>
</requestHandler>
-->
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers"/>
<!-- ping/healthcheck -->
<requestHandler name="/admin/ping" class="PingRequestHandler">
<lst name="defaults">
<str name="qt">standard</str>
<str name="q">solrpingquery</str>
<str name="echoParams">all</str>
</lst>
</requestHandler>
<!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<!-- for all params (including the default etc) use: 'all' -->
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<highlighting>
<!-- Configure the standard fragmenter -->
<!-- This could most likely be commented out in the "default" case -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<!-- A regular-expression-based fragmenter (f.i., for sentence extraction) -->
<fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
<lst name="defaults">
<!-- slightly smaller fragsizes work better because of slop -->
<int name="hl.fragsize">70</int>
<!-- allow 50% slop on fragment sizes -->
<float name="hl.regex.slop">0.5</float>
<!-- a basic sentence pattern -->
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
</lst>
</fragmenter>
<!-- Configure the standard formatter -->
<formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
<lst name="defaults">
<str name="hl.simple.pre"><![CDATA[<em>]]></str>
<str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst>
</formatter>
</highlighting>
<!-- An example dedup update processor that creates the "id" field on the fly
based on the hash code of some other fields. This example has overwriteDupes
set to false since we are using the id field as the signatureField and Solr
will maintain uniqueness based on that anyway.
You have to link the chain to an update handler above to use it ie:
<requestHandler name="/update "class="solr.XmlUpdateRequestHandler">
<lst name="defaults">
<str name="update.processor">dedupe</str>
</lst>
</requestHandler>
-->
<!--
<updateRequestProcessorChain name="dedupe">
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
<str name="signatureField">id</str>
<bool name="overwriteDupes">false</bool>
<str name="fields">name,features,cat</str>
<str name="signatureClass">org.apache.solr.update.processor.Lookup3Signature</str>
</processor>
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
-->
<!-- queryResponseWriter plugins... query responses will be written using the
writer specified by the 'wt' request parameter matching the name of a registered
writer.
The "default" writer is the default and will be used if 'wt' is not specified
in the request. XMLResponseWriter will be used if nothing is specified here.
The json, python, and ruby writers are also available by default.
<queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/>
<queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
<queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/>
<queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/>
<queryResponseWriter name="php" class="org.apache.solr.request.PHPResponseWriter"/>
<queryResponseWriter name="phps" class="org.apache.solr.request.PHPSerializedResponseWriter"/>
<queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>
-->
<!-- XSLT response writer transforms the XML output by any xslt file found
in Solr's conf/xslt directory. Changes to xslt files are checked for
every xsltCacheLifetimeSeconds.
-->
<queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<!-- example of registering a query parser
<queryParser name="lucene" class="org.apache.solr.search.LuceneQParserPlugin"/>
-->
<!-- example of registering a custom function parser
<valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" />
-->
<!-- config for the admin interface -->
<admin>
<defaultQuery>solr</defaultQuery>
<!-- configure a healthcheck file for servers behind a loadbalancer
<healthcheck type="file">server-enabled</healthcheck>
-->
</admin>
<requestHandler class="solr.MoreLikeThisHandler" name="/mlt">
<lst name="defaults">
<str name="mlt.mintf">1</str>
<str name="mlt.mindf">2</str>
</lst>
</requestHandler>
</config>

View File

@ -1,2 +0,0 @@
pizza
history

View File

@ -1,58 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
# a couple of test stopwords to test that the words are really being
# configured from this file:
stopworda
stopwordb
#Standard english stop words taken from Lucene's StopAnalyzer
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
s
such
t
that
the
their
then
there
these
they
this
to
was
will
with

View File

@ -1,31 +0,0 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
#some test synonym mappings unlikely to appear in real input text
aaa => aaaa
bbb => bbbb1 bbbb2
ccc => cccc1,cccc2
a\=>a => b\=>b
a\,a => b\,b
fooaaa,baraaa,bazaaa
# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
#after us won't split it into two words.
# Synonym mappings can be used for spelling correction too
pixima => pixma

885
tmp/tire-dsl.rb Normal file
View File

@ -0,0 +1,885 @@
# encoding: UTF-8
#
# **Tire** provides rich and comfortable Ruby API for the
# [_ElasticSearch_](http://www.elasticsearch.org/) search engine/database.
#
# _ElasticSearch_ is a scalable, distributed, cloud-ready, highly-available
# full-text search engine and database, communicating by JSON over RESTful HTTP,
# based on [Lucene](http://lucene.apache.org/), written in Java.
#
# <img src="http://github.com/favicon.ico" style="position:relative; top:2px">
# _Tire_ is open source, and you can download or clone the source code
# from <https://github.com/karmi/tire>.
#
# By following these instructions you should have the search running
# on a sane operation system in less then 10 minutes.
# Note, that this file can be executed directly:
#
# ruby -I lib examples/tire-dsl.rb
#
#### Installation
# Install _Tire_ with _Rubygems_:
#
# gem install tire
#
require 'rubygems'
require 'colorize'
# _Tire_ uses the [_multi_json_](https://github.com/intridea/multi_json) gem as a generic JSON library.
# We want to use the [_yajl-ruby_](https://github.com/brianmario/yajl-ruby) gem in its full on mode here.
#
require 'yajl/json_gem'
# Now, let's require the _Tire_ gem itself, and we're ready to go.
#
require 'tire'
#### Prerequisites
# We'll need a working and running _ElasticSearch_ server, of course. Thankfully, that's easy.
( puts <<-"INSTALL" ; exit(1) ) unless (RestClient.get('http://localhost:9200') rescue false)
[ERROR] You dont appear to have ElasticSearch installed. Please install and launch it with the following commands:
curl -k -L -o elasticsearch-0.19.0.tar.gz http://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-0.19.0.tar.gz
tar -zxvf elasticsearch-0.19.0.tar.gz
./elasticsearch-0.19.0/bin/elasticsearch -f
INSTALL
### Storing and indexing documents
# Let's initialize an index named “articles”.
#
Tire.index 'articles' do
# To make sure it's fresh, let's delete any existing index with the same name.
#
delete
# And then, let's create it.
#
create
# We want to store and index some articles with `title`, `tags` and `published_on` properties.
# Simple Hashes are OK. The default type is „document”.
#
store :title => '復興「校球」 政大男足決戰UFA足球聯賽', :tags => ['足球'], :published_on => '2011-01-01'
store :title => '社科院舉辦碩博士班畢業生撥穗典禮', :tags => ['博士班', '畢業'], :published_on => '2011-01-02'
# We usually want to set a specific _type_ for the document in _ElasticSearch_.
# Simply setting a `type` property is OK.
#
store :type => 'article',
:title => '支持政大學子 羅家倫之女設立獎學金',
:tags => ['獎學金'],
:published_on => '2011-01-02'
# We may want to wrap your data in a Ruby class, and use it when storing data.
# The contract required of such a class is very simple.
#
class Article
#
attr_reader :title, :tags, :published_on
def initialize(attributes={})
@attributes = attributes
@attributes.each_pair { |name,value| instance_variable_set :"@#{name}", value }
end
# It must provide a `type`, `_type` or `document_type` method for propper mapping.
#
def type
'article'
end
# And it must provide a `to_indexed_json` method for conversion to JSON.
#
def to_indexed_json
@attributes.to_json
end
end
# Note: Since our class takes a Hash of attributes on initialization, we may even
# wrap the results in instances of this class; we'll see how to do that further below.
#
article = Article.new :title => '親身感受臺灣特色 日本田野研究團政大學習',
:tags => ['臺灣特色', '日本'],
:published_on => '2011-01-03'
# Let's store the `article`, now.
#
store article
# And let's „force refresh“ the index, so we can query it immediately.
#
refresh
end
# We may want to define a specific [mapping](http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html)
# for the index.
Tire.index 'articles' do
# To do so, let's just pass a Hash containing the specified mapping to the `Index#create` method.
#
create :mappings => {
# Let's specify for which _type_ of documents this mapping should be used:
# „article”, in our case.
#
:article => {
:properties => {
# Let's specify the type of the field, whether it should be analyzed, ...
#
:id => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
# ... set the boost or analyzer settings for the field, etc. The _ElasticSearch_ guide
# has [more information](http://elasticsearch.org/guide/reference/mapping/index.html).
# Don't forget, that proper mapping is key to efficient and effective search.
# But don't fret about getting the mapping right the first time, you won't.
# In most cases, the default, dynamic mapping is just fine for prototyping.
#
:title => { :type => 'string', :analyzer => 'cjk', :boost => 2.0 },
:tags => { :type => 'string', :analyzer => 'keyword' },
:content => { :type => 'string', :analyzer => 'cjk' }
}
}
}
end
#### Bulk Indexing
# Of course, we may have large amounts of data, and adding them to the index one by one really isn't the best idea.
# We can use _ElasticSearch's_ [bulk API](http://www.elasticsearch.org/guide/reference/api/bulk.html)
# for importing the data.
# So, for demonstration purposes, let's suppose we have a simple collection of hashes to store.
#
articles = [
# Notice that such objects must have an `id` property!
#
{ :id => '1', :type => 'article', :title => '復興「校球」 政大男足決戰UFA足球聯賽', :tags => ['足球'], :published_on => '2011-01-01' },
# And, of course, they should contain the `type` property for the mapping to work!
#
{ :id => '2', :type => 'article', :title => '社科院舉辦碩博士班畢業生撥穗典禮', :tags => ['博士班', '畢業','社科院'], :published_on => '2011-01-02' },
{ :id => '3', :type => 'article', :title => '支持政大學子 羅家倫之女設立獎學金', :tags => ['獎學金'], :published_on => '2011-01-02' },
{ :id => '4', :type => 'article', :title => '親身感受臺灣特色 日本田野研究團政大學習', :tags => ['臺灣特色', '日本'], :published_on => '2011-01-03' }
]
# We can just push them into the index in one go.
#
Tire.index 'articles' do
import articles
end
# Of course, we can easily manipulate the documents before storing them in the index.
#
Tire.index 'articles' do
delete
# ... by passing a block to the `import` method. The collection will
# be available in the block argument.
#
import articles do |documents|
# We will capitalize every _title_ and return the manipulated collection
# back to the `import` method.
#
documents.map { |document| document.update(:title => document[:title].capitalize) }
end
refresh
end
### Searching
# With the documents indexed and stored in the _ElasticSearch_ database, we can search them, finally.
#
# _Tire_ exposes the search interface via simple domain-specific language.
#### Simple Query String Searches
# We can do simple searches, like searching for articles containing “One” in their title.
#
s = Tire.search('news_bulletins') do
query do
string "title:政大"
end
end
# The results:
# * One [tags: ruby]
#
s.results.each do |document|
puts "Test1==============================Has results: #{s.results.count}".yellow
puts "* #{ document.title } [tags: ]"
end
# Or, we can search for articles published between January, 1st and January, 2nd.
#
puts "Test2==Or, we can search for articles published between January, 1st and January, 2nd.=Has results: #{s.results.count}".yellow
s = Tire.search('articles') do
query do
string "published_on:[2011-01-01 TO 2011-01-02]"
end
end
# The results:
# * One [published: 2011-01-01]
# * Two [published: 2011-01-02]
# * Three [published: 2011-01-02]
#
s.results.each do |document|
puts "* #{ document.title } [published: #{document.published_on}]"
end
# Notice, that we can access local variables from the _enclosing scope_.
# (Of course, we may write the blocks in shorter notation.)
# We will define the query in a local variable named `q`...
#
q = "title:T*"
# ... and we can use it inside the `query` block.
#
s = Tire.search('articles') { query { string q } }
# The results:
# * Two [tags: ruby, python]
# * Three [tags: java]
#
puts "Test3==and we can use it inside the `query` block..[ #{q} ]=Has results: #{s.results.count}".yellow
s.results.each do |document|
puts "* #{ document.title } [tags:]"
end
# Often, we need to access variables or methods defined in the _outer scope_.
# To do that, we have to use a slight variation of the DSL.
#
# Let's assume we have a plain Ruby class, named `Article`.
#
class Article
# We will define the query in a class method...
#
def self.q
"title:T*"
end
# ... and wrap the _Tire_ search method in another one.
def self.search
# Notice how we pass the `search` object around as a block argument.
#
Tire.search('articles') do |search|
# And we pass the query object in a similar matter.
#
search.query do |query|
# Which means we can access the `q` class method.
#
query.string self.q
end
end.results
end
end
# We may use any valid [Lucene query syntax](http://lucene.apache.org/java/3_0_3/queryparsersyntax.html)
# for the `query_string` queries.
# For debugging our queries, we can display the JSON which is being sent to _ElasticSearch_.
#
# {"query":{"query_string":{"query":"title:T*"}}}
#
puts "", "Query:", "-"*80
puts s.to_json.green
# Or better yet, we may display a complete `curl` command to recreate the request in terminal,
# so we can see the naked response, tweak request parameters and meditate on problems.
#
# curl -X POST "http://localhost:9200/articles/_search?pretty=true" \
# -d '{"query":{"query_string":{"query":"title:T*"}}}'
#
puts "", "Try the query in Curl:", "-"*80
puts s.to_curl.green
### Logging
# For debugging more complex situations, we can enable logging, so requests and responses
# will be logged using this `curl`-friendly format.
Tire.configure do
# By default, at the _info_ level, only the `curl`-format of request and
# basic information about the response will be logged:
#
# # 2011-04-24 11:34:01:150 [CREATE] ("articles")
# #
# curl -X POST "http://localhost:9200/articles"
#
# # 2011-04-24 11:34:01:152 [200]
#
logger 'elasticsearch.log'
# For debugging, we can switch to the _debug_ level, which will log the complete JSON responses.
#
# That's very convenient if we want to post a recreation of some problem or solution
# to the mailing list, IRC channel, etc.
#
logger 'elasticsearch.log', :level => 'debug'
# Note that we can pass any [`IO`](http://www.ruby-doc.org/core/classes/IO.html)-compatible Ruby object as a logging device.
#
logger STDERR
end
### Configuration
# As we have just seen with logging, we can configure various parts of _Tire_.
#
Tire.configure do
# First of all, we can configure the URL for _ElasticSearch_.
#
url "http://search.example.com"
# Second, we may want to wrap the result items in our own class, for instance
# the `Article` class set above.
#
wrapper Article
# Finally, we can reset one or all configuration settings to their defaults.
#
reset :url
reset
end
### Complex Searching
# Query strings are convenient for simple searches, but we may want to define our queries more expressively,
# using the _ElasticSearch_ [Query DSL](http://www.elasticsearch.org/guide/reference/query-dsl/index.html).
#
s = Tire.search('articles') do
# Let's suppose we want to search for articles with specific _tags_, in our case “ruby” _or_ “python”.
#
query do
# That's a great excuse to use a [_terms_](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
# query.
#
terms :tags, ['ruby', 'python']
end
end
# The search, as expected, returns three articles, all tagged “ruby” — among other tags:
#
# * Two [tags: ruby, python]
# * One [tags: ruby]
# * Four [tags: ruby, php]
#
puts "Test4==The search, as expected, returns three articles, all tagged “STHs” — among other tags.Has results: #{s.results.count}".yellow
s.results.each do |document|
puts "* #{ document.title } [tags: ]"
end
# What if we wanted to search for articles tagged both “ruby” _and_ “python”?
#
s = Tire.search('articles') do
query do
# That's a great excuse to specify `minimum_match` for the query.
#
terms :tags, ['ruby', 'python'], :minimum_match => 2
end
end
# The search, as expected, returns one article, tagged with _both_ “ruby” and “python”:
#
# * Two [tags: ruby, python]
#
puts "Test5==The search, as expected, returns one article, tagged with _both_ 'ruby' and 'python'.Has results: #{s.results.count}".yellow
s.results.each do |document|
puts "* #{ document.title } [tags: ]"
end
#### Boolean Queries
# Quite often, we need complex queries with boolean logic.
# Instead of composing long query strings such as `tags:ruby OR tags:java AND NOT tags:python`,
# we can use the [_bool_](http://www.elasticsearch.org/guide/reference/query-dsl/bool-query.html)
# query.
s = Tire.search('news_bulletins') do
query do
# In _Tire_, we can build `bool` queries declaratively, as usual.
boolean do
# Let's define a `should` (`OR`) query for _ruby_,
#
should { string 'title:政大' }
# as well as for _java_,
must_not { string 'title:復興' }
# while defining a `must_not` (`AND NOT`) query for _python_.
# must_not { string 'tags:python' }
end
end
end
# The search returns these documents:
#
# * One [tags: ruby]
# * Three [tags: java]
# * Four [tags: ruby, php]
puts "Test6==Boolean Queries.Has results: #{s.results.count}".yellow
s.results.each do |document|
puts "* #{ document.title } [tags: ]"
end
puts "Test7== mix and reuse Boolean Queries: #{s.results.count}".yellow
# The best thing about `boolean` queries is that we can very easily save these partial queries as Ruby blocks,
# to mix and reuse them later, since we can call the `boolean` method multiple times.
#
# Let's define the query for the _tags_ property,
#
tags_query = lambda do |boolean|
boolean.should { string 'tags:ruby' }
boolean.should { string 'tags:java' }
end
# ... and a query for the _published_on_ property.
published_on_query = lambda do |boolean|
boolean.must { string 'published_on:[2011-01-01 TO 2011-01-02]' }
end
# Now, we can use the `tags_query` on its own.
#
Tire.search('articles') { query { boolean &tags_query } }
# Or, we can combine it with the `published_on` query.
#
Tire.search('articles') do
query do
boolean &tags_query
boolean &published_on_query
end
end
# _ElasticSearch_ supports many types of [queries](http://www.elasticsearch.org/guide/reference/query-dsl/).
#
# Eventually, _Tire_ will support all of them. So far, only these are supported:
#
# * [string](http://www.elasticsearch.org/guide/reference/query-dsl/query-string-query.html)
# * [text](http://www.elasticsearch.org/guide/reference/query-dsl/text-query.html)
# * [term](http://elasticsearch.org/guide/reference/query-dsl/term-query.html)
# * [terms](http://elasticsearch.org/guide/reference/query-dsl/terms-query.html)
# * [bool](http://www.elasticsearch.org/guide/reference/query-dsl/bool-query.html)
# * [custom_score](http://www.elasticsearch.org/guide/reference/query-dsl/custom-score-query.html)
# * [fuzzy](http://www.elasticsearch.org/guide/reference/query-dsl/fuzzy-query.html)
# * [all](http://www.elasticsearch.org/guide/reference/query-dsl/match-all-query.html)
# * [ids](http://www.elasticsearch.org/guide/reference/query-dsl/ids-query.html)
puts "Topic#### Faceted Search ==> SKIP".yellow
# # _ElasticSearch_ makes it trivial to retrieve complex aggregated data from our index/database,
# # so called [_facets_](http://www.elasticsearch.org/guide/reference/api/search/facets/index.html).
# # Let's say we want to display article counts for every tag in the database.
# # For that, we'll use a _terms_ facet.
# #
# s = Tire.search 'articles' do
# # We will search for articles whose title begins with letter “T”,
# #
# query { string 'title:T*' }
# # and retrieve the counts “bucketed” by `tags`.
# #
# facet 'tags' do
# terms :tags
# end
# end
# # As we see, our query has found two articles, and if you recall our articles from above,
# # _Two_ is tagged with “ruby” and “python”, while _Three_ is tagged with “java”.
# #
# # Found 2 articles: Three, Two
# #
# # The counts shouldn't surprise us:
# #
# # Counts by tag:
# # -------------------------
# # ruby 1
# # python 1
# # java 1
# #
# puts "Found #{s.results.count} articles: #{s.results.map(&:title).join(', ')}"
# puts "Counts by tag:", "-"*25
# s.results.facets['tags']['terms'].each do |f|
# puts "#{f['term'].ljust(10)} #{f['count']}"
# end
# # These counts are based on the scope of our current query.
# # What if we wanted to display aggregated counts by `tags` across the whole database?
# #
# s = Tire.search 'articles' do
# # Let's repeat the search for “T”...
# #
# query { string 'title:T*' }
# facet 'global-tags', :global => true do
# # ...but set the `global` scope for the facet in this case.
# #
# terms :tags
# end
# # We can even _combine_ facets scoped to the current query
# # with globally scoped facets — we'll just use a different name.
# #
# facet 'current-tags' do
# terms :tags
# end
# end
# # Aggregated results for the current query are the same as previously:
# #
# # Current query facets:
# # -------------------------
# # ruby 1
# # python 1
# # java 1
# #
# puts "Current query facets:", "-"*25
# s.results.facets['current-tags']['terms'].each do |f|
# puts "#{f['term'].ljust(10)} #{f['count']}"
# end
# # On the other hand, aggregated results for the global scope include also
# # tags for articles not matched by the query, such as “java” or “php”:
# #
# # Global facets:
# # -------------------------
# # ruby 3
# # python 1
# # php 1
# # java 1
# #
# puts "Global facets:", "-"*25
# s.results.facets['global-tags']['terms'].each do |f|
# puts "#{f['term'].ljust(10)} #{f['count']}"
# end
# # _ElasticSearch_ supports many advanced types of facets, such as those for computing statistics or geographical distance.
# #
# # Eventually, _Tire_ will support all of them. So far, only these are supported:
# #
# # * [terms](http://www.elasticsearch.org/guide/reference/api/search/facets/terms-facet.html)
# # * [date](http://www.elasticsearch.org/guide/reference/api/search/facets/date-histogram-facet.html)
# # * [range](http://www.elasticsearch.org/guide/reference/api/search/facets/range-facet.html)
# # * [histogram](http://www.elasticsearch.org/guide/reference/api/search/facets/histogram-facet.html)
# # * [statistical](http://www.elasticsearch.org/guide/reference/api/search/facets/statistical-facet.html)
# # * [terms_stats](http://www.elasticsearch.org/guide/reference/api/search/facets/terms-stats-facet.html)
# # * [query](http://www.elasticsearch.org/guide/reference/api/search/facets/query-facet.html)
# # We have seen that _ElasticSearch_ facets enable us to fetch complex aggregations from our data.
# #
# # They are frequently used for another feature, „faceted navigation“.
# # We can be combine query and facets with
# # [filters](http://elasticsearch.org/guide/reference/api/search/filter.html),
# # so the returned documents are restricted by certain criteria — for example to a specific category —,
# # but the aggregation calculations are still based on the original query.
# #### Filtered Search
# # So, let's make our search a bit more complex. Let's search for articles whose titles begin
# # with letter “T”, again, but filter the results, so only the articles tagged “ruby”
# # are returned.
# #
# s = Tire.search 'articles' do
# # We will use just the same **query** as before.
# #
# query { string 'title:T*' }
# # But we will add a _terms_ **filter** based on tags.
# #
# filter :terms, :tags => ['ruby']
# # And, of course, our facet definition.
# #
# facet('tags') { terms :tags }
# end
# # We see that only the article _Two_ (tagged “ruby” and “python”) is returned,
# # _not_ the article _Three_ (tagged “java”):
# #
# # * Two [tags: ruby, python]
# #
# s.results.each do |document|
# puts "* #{ document.title } [tags: ]"
# end
# # The _count_ for article _Three_'s tags, “java”, on the other hand, _is_ in fact included:
# #
# # Counts by tag:
# # -------------------------
# # ruby 1
# # python 1
# # java 1
# #
# puts "Counts by tag:", "-"*25
# s.results.facets['tags']['terms'].each do |f|
# puts "#{f['term'].ljust(10)} #{f['count']}"
# end
# #### Sorting
# # By default, the results are sorted according to their relevancy.
# #
# s = Tire.search('articles') { query { string 'tags:ruby' } }
# s.results.each do |document|
# puts "* #{ document.title } " +
# "[tags: ; " +
# # The score is available as the `_score` property.
# #
# "score: #{document._score}]"
# end
# # The results:
# #
# # * One [tags: ruby; score: 0.30685282]
# # * Four [tags: ruby, php; score: 0.19178301]
# # * Two [tags: ruby, python; score: 0.19178301]
# # But, what if we want to sort the results based on some other criteria,
# # such as published date or product price? We can do that.
# #
# s = Tire.search 'articles' do
# # We will search for articles tagged “ruby”, again, ...
# #
# query { string 'tags:ruby' }
# # ... but will sort them by their `title`, in descending order.
# #
# sort { by :title, 'desc' }
# end
# # The results:
# #
# # * Two
# # * One
# # * Four
# #
# s.results.each do |document|
# puts "* #{ document.title }"
# end
# # Of course, it's possible to combine more fields in the sorting definition.
# s = Tire.search 'articles' do
# # We will just get all articles in this case.
# #
# query { all }
# sort do
# # We will sort the results by their `published_on` property in _ascending_ order (the default),
# #
# by :published_on
# # and by their `title` property, in _descending_ order.
# #
# by :title, 'desc'
# end
# end
# # The results:
# # * One (Published on: 2011-01-01)
# # * Two (Published on: 2011-01-02)
# # * Three (Published on: 2011-01-02)
# # * Four (Published on: 2011-01-03)
# #
# s.results.each do |document|
# puts "* #{ document.title.ljust(10) } (Published on: #{ document.published_on })"
# end
# #### Highlighting
# # Often, we want to highlight the snippets matching our query in the displayed results.
# # _ElasticSearch_ provides rich
# # [highlighting](http://www.elasticsearch.org/guide/reference/api/search/highlighting.html)
# # features, and _Tire_ makes them trivial to use.
# #
# s = Tire.search 'articles' do
# # Let's search for documents containing word “Two” in their titles,
# query { string 'title:Two' }
# # and instruct _ElasticSearch_ to highlight relevant snippets.
# #
# highlight :title
# end
# # The results:
# # Title: Two; Highlighted: <em>Two</em>
# #
# s.results.each do |document|
# puts "Title: #{ document.title }; Highlighted: #{document.highlight.title}"
# end
# # We can configure many options for highlighting, such as:
# #
# s = Tire.search 'articles' do
# query { string 'title:Two' }
# # • specify the fields to highlight
# #
# highlight :title, :body
# # • specify their individual options
# #
# highlight :title, :body => { :number_of_fragments => 0 }
# # • or specify global highlighting options, such as the wrapper tag
# #
# highlight :title, :body, :options => { :tag => '<strong class="highlight">' }
# end
# #### Percolation
# # _ElasticSearch_ comes with one very interesting, and rather unique feature:
# # [_percolation_](http://www.elasticsearch.org/guide/reference/api/percolate.html).
# # It works in a „reverse search“ manner to regular search workflow of adding
# # documents to the index and then querying them.
# # Percolation allows us to register a query, and ask if a specific document
# # matches it, either on demand, or immediately as the document is being indexed.
# # Let's review an example for an index named _weather_.
# # We will register three queries for percolation against this index.
# #
# index = Tire.index('weather') do
# delete
# create
# # First, a query named _warning_,
# #
# register_percolator_query('warning', :tags => ['warning']) { string 'warning OR severe OR extreme' }
# # a query named _tsunami_,
# #
# register_percolator_query('tsunami', :tags => ['tsunami']) { string 'tsunami' }
# # and a query named _floods_.
# #
# register_percolator_query('floods', :tags => ['floods']) { string 'flood*' }
# end
# # Notice, that we have added a _tags_ field to the query document, because it behaves
# # just like any other document in _ElasticSearch_.
# # We will refresh the `_percolator` index for immediate access.
# #
# Tire.index('_percolator').refresh
# # Now, let's _percolate_ a document containing some trigger words against all registered queries.
# #
# matches = index.percolate(:message => '[Warning] Extreme flooding expected after tsunami wave.')
# # The result will contain, unsurprisingly, names of all the three registered queries:
# #
# # Matching queries: ["floods", "tsunami", "warning"]
# #
# puts "Matching queries: " + matches.inspect
# # We can filter the executed queries with a regular _ElasticSearch_ query passed as a block to
# # the `percolate` method.
# #
# matches = index.percolate(:message => '[Warning] Extreme flooding expected after tsunami wave.') do
# # Let's use a _terms_ query against the `tags` field.
# term :tags, 'tsunami'
# end
# # In this case, the result will contain only the name of the “tsunami” query.
# #
# # Matching queries: ["tsunami"]
# #
# puts "Matching queries: " + matches.inspect
# # What if we percolate another document, without the “tsunami” trigger word?
# #
# matches = index.percolate(:message => '[Warning] Extreme temperatures expected.') { term :tags, 'tsunami' }
# # As expected, we will get an empty array:
# #
# # Matching queries: []
# #
# puts "Matching queries: " + matches.inspect
# # Well, that's of course immensely useful for real-time search systems. But, there's more.
# # We can _percolate_ a document _at the same time_ it is being stored in the index,
# # getting back a list of matching queries.
# # Let's store a document with some trigger words in the index, and mark it for percolation.
# #
# response = index.store :message => '[Warning] Severe floods expected after tsunami wave.', :percolate => true
# # We will get the names of all matching queries in response.
# #
# # Matching queries: ["floods", "tsunami", "warning"]
# #
# puts "Matching queries: " + response['matches'].inspect
# # As with the _percolate_ example, we can filter the executed queries.
# #
# response = index.store :message => '[Warning] Severe floods expected after tsunami wave.',
# # Let's use a simple string query for the “tsunami” tag.
# :percolate => 'tags:tsunami'
# # Unsurprisingly, the response will contain just the name of the “tsunami” query.
# #
# # Matching queries: ["tsunami"]
# #
# puts "Matching queries: " + response['matches'].inspect
# ### ActiveModel Integration
# # As you can see, [_Tire_](https://github.com/karmi/tire) supports the
# # main features of _ElasticSearch_ in Ruby.
# #
# # It allows you to create and delete indices, add documents, search them, retrieve the facets, highlight the results,
# # and comes with a usable logging facility.
# #
# # Of course, the holy grail of any search library is easy, painless integration with your Ruby classes, and,
# # most importantly, with ActiveRecord/ActiveModel classes.
# #
# # Please, check out the [README](https://github.com/karmi/tire/tree/master#readme) file for instructions
# # how to include _Tire_-based search in your models..
# #
# # Send any feedback via Github issues, or ask questions in the [#elasticsearch](irc://irc.freenode.net/#elasticsearch) IRC channel.

View File

@ -10,12 +10,22 @@ class Panel::News::FrontEnd::NewsBulletinsController < OrbitWidgetController
def index
if !params[:search_query].blank?
@news_bulletins = NewsBulletin.solr_search do
fulltext params[:search_query] do
fields(:text,:title=>2.0)
end
end.each_hit_with_result
search_query =params[:search_query].gsub(/"/,"").split(" ")
words_query = lambda do |boolean|
search_query.each do |word|
boolean.should { string "title:#{word}" }
#boolean.should { string "sub_title:#{word}" }
#boolean.should { string "text:#{word}" }
end
end
search_result=Tire.search('news_bulletins') do
query {boolean &words_query }
#raise to_curl
end.results.collect{|t| t.id}
@news_bulletins = NewsBulletin.can_display.any_in(_id:search_result).page( params[:page_main]).per(10)
else
date_now = Time.now
if !params[:category_id].blank?

View File

@ -5,16 +5,13 @@ class NewsBulletin
include Mongoid::Timestamps
include Mongoid::MultiParameterAttributes
include Impressionist::Impressionable
# include OrbitBasis::BaseModel
# include OrbitSearchLib::ObjectSearchable
# include NccuSearch
include Tire::Model::Search
include Tire::Model::Callbacks
# include Redis::Objects
include Mongoid::BaseModel
include Sunspot::Mongoid
# scope :searchable,where(:is_checked=>true,:is_hidden=>false,:is_pending=>false)
scope :searchable,where(:is_checked=>true,:is_hidden=>false,:is_pending=>false)
is_impressionable :counter_cache => { :column_name => :view_count }
@ -64,61 +61,43 @@ class NewsBulletin
after_save :save_news_bulletin_links
after_save :save_news_bulletin_files
# include OrbitBasis::ClassMethods
searchable do
text :title do
titles = ""
titles << (self.title.zh_tw.nil? ? '':self.title.zh_tw)
titles << (self.title.en.nil? ? '':self.title.en)
#(titles << self.title.en )unless self.title.nil?
doc = Nokogiri::HTML(titles)
doc.text
#VALID_LOCALES
end
text :text do
texts = ""
texts << (self.text.zh_tw.nil? ? '':self.text.zh_tw)
texts << (self.text.en.nil? ? '':self.text.en)
#texts << self.text.en )unless self.text.en.nil?
doc = Nokogiri::HTML(texts)
doc.text
#VALID_LOCALES
end
# text :sub_titles do
# self.subtitle.zh_tw
# #VALID_LOCALES
# end
# text :text do
# self.text.zh_tw
# #VALID_LOCALES
# end
# text :content, :publish_month
# text :comments do
# comments.map(&:content)
# end
# time :published_at
# string :publish_month
end
# Tire.index 'news_bulletin' do
# delete
# create :mappings =>{
# :news_bulletin_by_title => {
# :properties=> {
# :title => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
# :body => {}
# }
# }
# :news_bulletin_by_body => {
# :properties=> {
# :body => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
# }
# }
# }
# end
# mapping do
# indexes :title, :analyzer => 'cjk', :boost => 10
# indexes :sub_titles, :analyzer => 'cjk', :boost => 5
# indexes :text, :analyzer => 'cjk'
# end
# def to_indexed_json
# titles = title.zh_tw + title.en
# sub_titles = subtitle.zh_tw + subtitle.en
# texts = text.zh_tw + text.en
# {
# :id => id,
# :title => titles,
# :sub_titles => sub_titles,
# :text => texts
# }.to_json
# end
def type
"news_bulletin"
end
def to_indexed_json
titles = title.zh_tw #+ title.en
sub_titles = subtitle.zh_tw #+ subtitle.en
texts = text.zh_tw #+ text.en
{
:_id => _id,
:title => Nokogiri::HTML(titles).text,
:sub_title => Nokogiri::HTML(sub_titles).text,
:text => Nokogiri::HTML(texts).text
}.to_json
end
def self.search( search = nil, category_id = nil )
if category_id.to_s.size > 0 and search.to_s.size > 0