Pastie now auto-senses if line-wrap is a bad or good idea. Feedback?
## mark a section (Learn more)
Index: /Users/jney/Documents/repos/benchmark/search2/app/models/book.rb =================================================================== --- /Users/jney/Documents/repos/benchmark/search2/app/models/book.rb (révision 1755) +++ /Users/jney/Documents/repos/benchmark/search2/app/models/book.rb (copie de travail) @@ -1,13 +1,26 @@ class Book < ActiveRecord::Base has_many :verses - acts_as_ferret :fields => ['name', 'content'], - :remote => true, - :store_class_name => true, - :analyzer => Ferret::Analysis::StandardAnalyzer.new([]) # No stop words - - acts_as_solr :fields => ['name', 'content'] + # acts_as_ferret :fields => ['name', 'content'], + # :remote => true, + # :store_class_name => true, + # :analyzer => Ferret::Analysis::StandardAnalyzer.new([]) # No stop words + # + # acts_as_solr :fields => ['name', 'content'] + acts_as_tsearch :vectors => { + :fields => { + "a" => {:columns => ["books.name"]}, + "b" => {:columns => ["verses.content"]} + }, + :tables => { + :contents => { + :from => "verses", + :where => "verses.book_id = books.id" + } + } + } + is_indexed :fields => ['name'], :concatenate => [{:association_name => 'verses', :field => 'content', :as => 'content'}] Index: /Users/jney/Documents/repos/benchmark/search2/app/models/verse.rb =================================================================== --- /Users/jney/Documents/repos/benchmark/search2/app/models/verse.rb (révision 1755) +++ /Users/jney/Documents/repos/benchmark/search2/app/models/verse.rb (copie de travail) @@ -1,13 +1,15 @@ class Verse < ActiveRecord::Base belongs_to :book - acts_as_ferret :fields => ['book_id', 'chapter_id', 'number', 'content'], - :remote => true, - :store_class_name => true, - :analyzer => Ferret::Analysis::StandardAnalyzer.new([]) # No stop words + # acts_as_ferret :fields => ['book_id', 'chapter_id', 'number', 'content'], + # :remote => true, + # :store_class_name => true, + # :analyzer => Ferret::Analysis::StandardAnalyzer.new([]) # No stop words + # + # acts_as_solr :fields => ['book_id', 'chapter_id', 'number', 'content'] - acts_as_solr :fields => ['book_id', 'chapter_id', 'number', 'content'] - + acts_as_tsearch :fields => ['content'] + is_indexed :fields => ['book_id', 'chapter_id', 'number', 'content'] end Index: /Users/jney/Documents/repos/benchmark/search2/db/migrate/001_create_verses.rb =================================================================== --- /Users/jney/Documents/repos/benchmark/search2/db/migrate/001_create_verses.rb (révision 1755) +++ /Users/jney/Documents/repos/benchmark/search2/db/migrate/001_create_verses.rb (copie de travail) @@ -1,7 +1,22 @@ class CreateVerses < ActiveRecord::Migration def self.up - db = ActiveRecord::Base.connection.instance_variable_get('@config')[:database] - system("mysql -u root #{db} < #{RAILS_ROOT}/db/avkjv.sql") + create_table :verses, :force => true do |t| + t.column "book_id", :integer + t.column "chapter_id", :integer + t.column "number", :integer + t.column "content", :text + # column needed for tsearch + t.column "vectors", :tsvector + end + create_table :books, :force => true do |t| + t.column "name", :string, :null => false, :default => '' + t.column "chapter_count", :integer + t.column "grade", :numeric + # column needed for tsearch + t.column "vectors", :tsvector + end + db = ActiveRecord::Base.connection.instance_variable_get('@config') + system("psql -U #{db[:username]} #{db[:database]} -h #{db[:host]} < #{RAILS_ROOT}/db/avkjv.sql") end def self.down Index: /Users/jney/Documents/repos/benchmark/search2/lib/tasks/benchmark.rake =================================================================== --- /Users/jney/Documents/repos/benchmark/search2/lib/tasks/benchmark.rake (révision 1753) +++ /Users/jney/Documents/repos/benchmark/search2/lib/tasks/benchmark.rake (copie de travail) @@ -16,7 +16,7 @@ end counts = [] - Benchmark.bm(25) do |x| + Benchmark.bm(20) do |x| x.report "reindex" do # Call indexer directly in order to avoid the overhead of starting Ruby, since the other indexers execute # in the current Ruby process even though they would have their own startup overhead in a production @@ -69,7 +69,7 @@ end counts = [] - Benchmark.bm(25) do |x| + Benchmark.bm(20) do |x| x.report "reindex" do Verse.rebuild_index Book.rebuild_index @@ -116,7 +116,7 @@ end counts = [] - Benchmark.bm(25) do |x| + Benchmark.bm(20) do |x| x.report "reindex" do Verse.rebuild_solr_index(4000) Book.rebuild_solr_index(4000) @@ -153,6 +153,52 @@ puts "index size: #{`du -ch #{RAILS_ROOT}/vendor/plugins/acts_as_solr/solr/solr/data/development/index | grep total`}" memory("#{RAILS_ROOT}/vendor/plugins/acts_as_solr/solr/tmp/development_pid") end + + task :tsearch do + puts "\nTsearch" + counts = [] + + Benchmark.bm(20) do |x| + x.report "reindex" do + Verse.update_vectors + Book.update_vectors + end if ENV['INDEX'] + + counts << Verse.count_by_tsearch("God") + x.report "verse:god" do + TIMES.times { Verse.find_by_tsearch("God", :limit => 10, :offset => 20) } + end + + counts << Book.count_by_tsearch("God") + x.report "book:god" do + TIMES.times { Book.find_by_tsearch("God", :limit => 10) } + end + + counts << Verse.count_by_tsearch("God", :joins => 'JOIN books ON verses.book_id = books.id') + x.report "all:god" do + TIMES.times { Verse.find_by_tsearch("God", :limit => 10, :offset => 20, :joins => 'JOIN books ON verses.book_id = books.id') } + end + + counts << Verse.count_by_tsearch("molten calves", :limit => 10) + x.report "all:calves" do + TIMES.times { Verse.find_by_tsearch("molten calves", :limit => 10) } + end + + counts << Verse.count_by_tsearch("Moreover he said unto me, Son of man, eat that thou findest", :limit => 10) + x.report "all:moreover" do + TIMES.times { Verse.find_by_tsearch("Moreover he said unto me, Son of man, eat that thou findest", :limit => 10) } + end + + end + + puts "result counts: #{counts.inspect}" + puts "index size: #{`du -ch #{RAILS_ROOT}/index/ | grep total`}" + # no memory log for tsearch + # memory("#{RAILS_ROOT}/log/ferret.pid") + end + + task :pg => ['benchmark:sphinx', 'benchmark:tsearch'] do + end end task :benchmark => ['benchmark:sphinx', 'benchmark:solr', 'benchmark:ferret'] do
This paste will be private.
From the Design Piracy series on my blog: