From bef071dd0b6b634c5e8e49d8c1b9daa0baa4136c Mon Sep 17 00:00:00 2001 From: Jeremy Kemper Date: Mon, 29 Oct 2007 21:39:52 +0000 Subject: [PATCH] Introduce finder :joins with associations. Same :include syntax but with inner rather than outer joins. Closes #10012. git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@8054 5ecf4fe2-1ee6-0310-87b1-e25e094e27de --- activerecord/CHANGELOG | 7 + .../lib/active_record/associations.rb | 103 ++++++++++-- activerecord/lib/active_record/base.rb | 34 +++- .../lib/active_record/calculations.rb | 19 ++- .../test/associations/ar_joins_test.rb | 158 ++++++++++++++++++ 5 files changed, 299 insertions(+), 22 deletions(-) create mode 100644 activerecord/test/associations/ar_joins_test.rb diff --git a/activerecord/CHANGELOG b/activerecord/CHANGELOG index a7f8ad522e..ecea80cc5c 100644 --- a/activerecord/CHANGELOG +++ b/activerecord/CHANGELOG @@ -1,5 +1,12 @@ *SVN* +* Introduce finder :joins with associations. Same :include syntax but with inner rather than outer joins. #10012 [RubyRedRick] + # Find users with an avatar + User.find(:all, :joins => :avatar) + + # Find posts with a high-rated comment. + Post.find(:all, :joins => :comments, :conditions => 'comments.rating > 3') + * Associations: speedup duplicate record check. #10011 [lifofifo] * Make sure that << works on has_many associations on unsaved records. Closes #9989 [hasmanyjosh] diff --git a/activerecord/lib/active_record/associations.rb b/activerecord/lib/active_record/associations.rb index f4369060f7..b250af47e8 100755 --- a/activerecord/lib/active_record/associations.rb +++ b/activerecord/lib/active_record/associations.rb @@ -486,7 +486,63 @@ def clear_association_cache #:nodoc: # # When eager loaded, conditions are interpolated in the context of the model class, not the model instance. Conditions are lazily interpolated # before the actual model exists. - # + # + # == Adding Joins For Associations to Queries Using the :joins option + # + # ActiveRecord::Base#find provides a :joins option, which takes either a string or values accepted by the :include option. + # if the value is a string, the it should contain a SQL fragment containing a join clause. + # + # Non-string values of :joins will add an automatic join clause to the query in the same way that the :include option does but with two critical + # differences: + # + # 1. A normal (inner) join will be performed instead of the outer join generated by :include. + # this means that only objects which have objects attached to the association will be included in the result. + # For example, suppose we have the following tables (in yaml format): + # + # Authors + # fred: + # id: 1 + # name: Fred + # steve: + # id: 2 + # name: Steve + # + # Contributions + # only: + # id: 1 + # author_id: 1 + # description: Atta Boy Letter for Steve + # date: 2007-10-27 14:09:54 + # + # and corresponding AR Classes + # + # class Author: < ActiveRecord::Base + # has_many :contributions + # end + # + # class Contribution < ActiveRecord::Base + # belongs_to :author + # end + # + # The query Author.find(:all) will return both authors, but Author.find(:all, :joins => :contributions) will + # only return authors who have at least one contribution, in this case only the first. + # This is only a degenerate case of the more typical use of :joins with a non-string value. + # For example to find authors who have at least one contribution before a certain date we can use: + # + # Author.find(:all, :joins => :contributions, :conditions => ["contributions.date <= ?", 1.week.ago.to_s(:db)]) + # + # 2. Only instances of the class to which the find is sent will be instantiated. ActiveRecord objects will not + # be instantiated for rows reached through the associations. + # + # The difference between using :joins vs :include to name associated records is that :joins allows associated tables to + # participate in selection criteria in the query without incurring the overhead of instantiating associated objects. + # This can be important when the number of associated objects in the database is large, and they will not be used, or + # only those associated with a paricular object or objects will be used after the query, making two queries more + # efficient than one. + # + # Note that while using a string value for :joins marks the result objects as read-only, the objects resulting + # from a call to find with a non-string :joins option value will be writable. + # # == Table Aliasing # # ActiveRecord uses table aliasing in the case that a table is referenced multiple times in a join. If a table is referenced only once, @@ -1121,7 +1177,13 @@ def association_constructor_method(constructor, reflection, association_proxy_cl def find_with_associations(options = {}) catch :invalid_query do - join_dependency = JoinDependency.new(self, merge_includes(scope(:find, :include), options[:include]), options[:joins]) + if ar_joins = scope(:find, :ar_joins) + options = options.dup + options[:ar_joins] = ar_joins + end + includes = merge_includes(scope(:find, :include), options[:include]) + includes = merge_includes(includes, options[:ar_joins]) + join_dependency = JoinDependency.new(self, includes, options[:joins], options[:ar_joins]) rows = select_all_rows(options, join_dependency) return join_dependency.instantiate(rows) end @@ -1375,8 +1437,9 @@ def create_extension_modules(association_id, block_extension, extensions) class JoinDependency # :nodoc: attr_reader :joins, :reflections, :table_aliases - def initialize(base, associations, joins) + def initialize(base, associations, joins, ar_joins = nil) @joins = [JoinBase.new(base, joins)] + @ar_joins = ar_joins @associations = associations @reflections = [] @base_records_hash = {} @@ -1400,9 +1463,9 @@ def instantiate(rows) unless @base_records_hash[primary_id] @base_records_in_order << (@base_records_hash[primary_id] = join_base.instantiate(row)) end - construct(@base_records_hash[primary_id], @associations, join_associations.dup, row) + construct(@base_records_hash[primary_id], @associations, join_associations.dup, row) unless @ar_joins end - remove_duplicate_results!(join_base.active_record, @base_records_in_order, @associations) + remove_duplicate_results!(join_base.active_record, @base_records_in_order, @associations) unless @ar_joins return @base_records_in_order end @@ -1444,7 +1507,7 @@ def build(associations, parent = nil) reflection = parent.reflections[associations.to_s.intern] or raise ConfigurationError, "Association named '#{ associations }' was not found; perhaps you misspelled it?" @reflections << reflection - @joins << JoinAssociation.new(reflection, self, parent) + @joins << (@ar_joins ? ARJoinAssociation : JoinAssociation).new(reflection, self, parent) when Array associations.each do |association| build(association, parent) @@ -1595,12 +1658,12 @@ def initialize(reflection, join_dependency, parent = nil) def association_join join = case reflection.macro when :has_and_belongs_to_many - " LEFT OUTER JOIN %s ON %s.%s = %s.%s " % [ + " #{join_type} %s ON %s.%s = %s.%s " % [ table_alias_for(options[:join_table], aliased_join_table_name), aliased_join_table_name, options[:foreign_key] || reflection.active_record.to_s.foreign_key, parent.aliased_table_name, reflection.active_record.primary_key] + - " LEFT OUTER JOIN %s ON %s.%s = %s.%s " % [ + " #{join_type} %s ON %s.%s = %s.%s " % [ table_name_and_alias, aliased_table_name, klass.primary_key, aliased_join_table_name, options[:association_foreign_key] || klass.to_s.foreign_key ] @@ -1658,13 +1721,13 @@ def association_join end end - " LEFT OUTER JOIN %s ON (%s.%s = %s.%s%s%s%s) " % [ + " #{join_type} %s ON (%s.%s = %s.%s%s%s%s) " % [ table_alias_for(through_reflection.klass.table_name, aliased_join_table_name), parent.aliased_table_name, reflection.active_record.connection.quote_column_name(parent.primary_key), aliased_join_table_name, reflection.active_record.connection.quote_column_name(jt_foreign_key), jt_as_extra, jt_source_extra, jt_sti_extra ] + - " LEFT OUTER JOIN %s ON (%s.%s = %s.%s%s) " % [ + " #{join_type} %s ON (%s.%s = %s.%s%s) " % [ table_name_and_alias, aliased_table_name, reflection.active_record.connection.quote_column_name(first_key), aliased_join_table_name, reflection.active_record.connection.quote_column_name(second_key), @@ -1672,7 +1735,7 @@ def association_join ] when reflection.options[:as] && [:has_many, :has_one].include?(reflection.macro) - " LEFT OUTER JOIN %s ON %s.%s = %s.%s AND %s.%s = %s" % [ + " #{join_type} %s ON %s.%s = %s.%s AND %s.%s = %s" % [ table_name_and_alias, aliased_table_name, "#{reflection.options[:as]}_id", parent.aliased_table_name, parent.primary_key, @@ -1681,14 +1744,14 @@ def association_join ] else foreign_key = options[:foreign_key] || reflection.active_record.name.foreign_key - " LEFT OUTER JOIN %s ON %s.%s = %s.%s " % [ + " #{join_type} %s ON %s.%s = %s.%s " % [ table_name_and_alias, aliased_table_name, foreign_key, parent.aliased_table_name, parent.primary_key ] end when :belongs_to - " LEFT OUTER JOIN %s ON %s.%s = %s.%s " % [ + " #{join_type} %s ON %s.%s = %s.%s " % [ table_name_and_alias, aliased_table_name, reflection.klass.primary_key, parent.aliased_table_name, options[:foreign_key] || klass.to_s.foreign_key ] @@ -1723,7 +1786,19 @@ def table_name_and_alias def interpolate_sql(sql) instance_eval("%@#{sql.gsub('@', '\@')}@") - end + end + + private + def join_type + "LEFT OUTER JOIN" + end + + end + class ARJoinAssociation < JoinAssociation + private + def join_type + "INNER JOIN" + end end end end diff --git a/activerecord/lib/active_record/base.rb b/activerecord/lib/active_record/base.rb index 386f4912e4..20c1f862b2 100755 --- a/activerecord/lib/active_record/base.rb +++ b/activerecord/lib/active_record/base.rb @@ -380,9 +380,11 @@ class << self # Class methods # * :group: An attribute name by which the result should be grouped. Uses the GROUP BY SQL-clause. # * :limit: An integer determining the limit on the number of rows that should be returned. # * :offset: An integer determining the offset from where the rows should be fetched. So at 5, it would skip rows 0 through 4. - # * :joins: An SQL fragment for additional joins like "LEFT JOIN comments ON comments.post_id = id". (Rarely needed). - # The records will be returned read-only since they will have attributes that do not correspond to the table's columns. + # * :joins: Either an SQL fragment for additional joins like "LEFT JOIN comments ON comments.post_id = id". (Rarely needed). + # or names associations in the same form used for the :include option. + # If the value is a string, then the records will be returned read-only since they will have attributes that do not correspond to the table's columns. # Pass :readonly => false to override. + # See adding joins for associations under Association. # * :include: Names associations that should be loaded alongside using LEFT OUTER JOINs. The symbols named refer # to already defined associations. See eager loading under Associations. # * :select: By default, this is * as in SELECT * FROM, but can be changed if you for example want to do a join, but not @@ -428,8 +430,17 @@ class << self # Class methods # end def find(*args) options = args.extract_options! + # Note: we extract any :joins option with a non-string value from the options, and turn it into + # an internal option :ar_joins. This allows code called from her to find the ar_joins, and + # it bypasses marking the result as read_only. + # A normal string join marks the result as read-only because it contains attributes from joined tables + # which are not in the base table and therefore prevent the result from being saved. + # In the case of an ar_join, the JoinDependency created to instantiate the results eliminates these + # bogus attributes. See JoinDependency#instantiate, and JoinBase#instantiate in associations.rb. + options, ar_joins = *extract_ar_join_from_options(options) validate_find_options(options) set_readonly_option!(options) + options[:ar_joins] = ar_joins if ar_joins case args.first when :first then find_initial(options) @@ -1020,8 +1031,17 @@ def find_initial(options) find_every(options).first end + # If options contains :joins, with a non-string value + # remove it from options + # return the updated or unchanged options, and the ar_join value or nil + def extract_ar_join_from_options(options) + new_options = options.dup + join_option = new_options.delete(:joins) + (join_option && !join_option.kind_of?(String)) ? [new_options, join_option] : [options, nil] + end + def find_every(options) - records = scoped?(:find, :include) || options[:include] ? + records = scoped?(:find, :include) || options[:include] || scoped?(:find, :ar_joins) || (options[:ar_joins]) ? find_with_associations(options) : find_by_sql(construct_finder_sql(options)) @@ -1445,7 +1465,13 @@ def with_scope(method_scoping = {}, action = :merge, &block) if f = method_scoping[:find] f.assert_valid_keys(VALID_FIND_OPTIONS) + # see note about :joins and :ar_joins in ActiveRecord::Base#find + f, ar_joins = *extract_ar_join_from_options(f) set_readonly_option! f + if ar_joins + f[:ar_joins] = ar_joins + method_scoping[:find] = f + end end # Merge scopings @@ -1458,7 +1484,7 @@ def with_scope(method_scoping = {}, action = :merge, &block) merge = hash[method][key] && params[key] # merge if both scopes have the same key if key == :conditions && merge hash[method][key] = [params[key], hash[method][key]].collect{ |sql| "( %s )" % sanitize_sql(sql) }.join(" AND ") - elsif key == :include && merge + elsif ([:include, :ar_joins].include?(key)) && merge hash[method][key] = merge_includes(hash[method][key], params[key]).uniq else hash[method][key] = hash[method][key] || params[key] diff --git a/activerecord/lib/active_record/calculations.rb b/activerecord/lib/active_record/calculations.rb index b2d6cef3bc..2327862f13 100644 --- a/activerecord/lib/active_record/calculations.rb +++ b/activerecord/lib/active_record/calculations.rb @@ -15,8 +15,11 @@ module ClassMethods # The third approach, count using options, accepts an option hash as the only parameter. The options are: # # * :conditions: An SQL fragment like "administrator = 1" or [ "user_name = ?", username ]. See conditions in the intro. - # * :joins: An SQL fragment for additional joins like "LEFT JOIN comments ON comments.post_id = id". (Rarely needed). - # The records will be returned read-only since they will have attributes that do not correspond to the table's columns. + # * :joins: Either an SQL fragment for additional joins like "LEFT JOIN comments ON comments.post_id = id". (Rarely needed). + # or names associations in the same form used for the :include option. + # If the value is a string, then the records will be returned read-only since they will have attributes that do not correspond to the table's columns. + # Pass :readonly => false to override. + # See adding joins for associations under Association. # * :include: Named associations that should be loaded alongside using LEFT OUTER JOINs. The symbols named refer # to already defined associations. When using named associations count returns the number DISTINCT items for the model you're counting. # See eager loading under Associations. @@ -109,7 +112,9 @@ def sum(column_name, options = {}) # Person.minimum(:age, :conditions => ['last_name != ?', 'Drake']) # Selects the minimum age for everyone with a last name other than 'Drake' # Person.minimum(:age, :having => 'min(age) > 17', :group => :last_name) # Selects the minimum age for any family without any minors def calculate(operation, column_name, options = {}) + options, ar_joins = *extract_ar_join_from_options(options) validate_calculation_options(operation, options) + options[:ar_joins] = ar_joins if ar_joins column_name = options[:select] if options[:select] column_name = '*' if column_name == :all column = column_for column_name @@ -149,8 +154,14 @@ def construct_calculation_sql(operation, column_name, options) #:nodoc: operation = operation.to_s.downcase options = options.symbolize_keys - scope = scope(:find) + scope = scope(:find) + if scope && scope[:ar_joins] + scope = scope.dup + options = options.dup + options[:ar_joins] = scope.delete(:ar_joins) + end merged_includes = merge_includes(scope ? scope[:include] : [], options[:include]) + merged_includes = merge_includes(merged_includes, options[:ar_joins]) aggregate_alias = column_alias_for(operation, column_name) if operation == 'count' @@ -173,7 +184,7 @@ def construct_calculation_sql(operation, column_name, options) #:nodoc: sql << " FROM (SELECT DISTINCT #{column_name}" if use_workaround sql << " FROM #{table_name} " if merged_includes.any? - join_dependency = ActiveRecord::Associations::ClassMethods::JoinDependency.new(self, merged_includes, options[:joins]) + join_dependency = ActiveRecord::Associations::ClassMethods::JoinDependency.new(self, merged_includes, options[:joins], options[:ar_joins]) sql << join_dependency.join_associations.collect{|join| join.association_join }.join end add_joins!(sql, options, scope) diff --git a/activerecord/test/associations/ar_joins_test.rb b/activerecord/test/associations/ar_joins_test.rb new file mode 100644 index 0000000000..e6a9180fae --- /dev/null +++ b/activerecord/test/associations/ar_joins_test.rb @@ -0,0 +1,158 @@ +require 'abstract_unit' +require 'fixtures/post' +require 'fixtures/comment' +require 'fixtures/author' +require 'fixtures/category' +require 'fixtures/categorization' +require 'fixtures/company' +require 'fixtures/topic' +require 'fixtures/reply' +require 'fixtures/developer' +require 'fixtures/project' + +class ArJoinsTest < Test::Unit::TestCase + fixtures :authors, :posts, :comments, :categories, :categories_posts, :people, + :developers, :projects, :developers_projects, + :categorizations, :companies, :accounts, :topics + + def test_ar_joins + authors = Author.find(:all, :joins => :posts, :conditions => ['posts.type = ?', "Post"]) + assert_not_equal(0 , authors.length) + authors.each do |author| + assert !(author.send(:instance_variables).include? "@posts") + assert(!author.readonly?, "non-string join value produced read-only result.") + end + end + + def test_ar_joins_with_cascaded_two_levels + authors = Author.find(:all, :joins=>{:posts=>:comments}) + assert_equal(2, authors.length) + authors.each do |author| + assert !(author.send(:instance_variables).include? "@posts") + assert(!author.readonly?, "non-string join value produced read-only result.") + end + authors = Author.find(:all, :joins=>{:posts=>:comments}, :conditions => ["comments.body = ?", "go crazy" ]) + assert_equal(1, authors.length) + authors.each do |author| + assert !(author.send(:instance_variables).include? "@posts") + assert(!author.readonly?, "non-string join value produced read-only result.") + end + end + + + def test_ar_joins_with_complex_conditions + authors = Author.find(:all, :joins=>{:posts=>[:comments, :categories]}, + :conditions => ["categories.name = ? AND posts.title = ?", "General", "So I was thinking"] + ) + assert_equal(1, authors.length) + authors.each do |author| + assert !(author.send(:instance_variables).include? "@posts") + assert(!author.readonly?, "non-string join value produced read-only result.") + end + assert_equal("David", authors.first.name) + end + + def test_ar_join_with_has_many_and_limit_and_scoped_and_explicit_conditions + Post.with_scope(:find => { :conditions => "1=1" }) do + posts = authors(:david).posts.find(:all, + :joins => :comments, + :conditions => "comments.body like 'Normal%' OR comments.#{QUOTED_TYPE}= 'SpecialComment'", + :limit => 2 + ) + assert_equal 2, posts.size + + count = Post.count( + :joins => [ :comments, :author ], + :conditions => "authors.name = 'David' AND (comments.body like 'Normal%' OR comments.#{QUOTED_TYPE}= 'SpecialComment')", + :limit => 2 + ) + assert_equal count, posts.size + end + end + + def test_ar_join_with_scoped_order_using_association_limiting_without_explicit_scope + posts_with_explicit_order = Post.find(:all, :conditions => 'comments.id is not null', :joins => :comments, :order => 'posts.id DESC', :limit => 2) + posts_with_scoped_order = Post.with_scope(:find => {:order => 'posts.id DESC'}) do + Post.find(:all, :conditions => 'comments.id is not null', :joins => :comments, :limit => 2) + end + assert_equal posts_with_explicit_order, posts_with_scoped_order + end + + def test_scoped_find_include + # with the include, will retrieve only developers for the given project + scoped_developers = Developer.with_scope(:find => { :joins => :projects }) do + Developer.find(:all, :conditions => 'projects.id = 2') + end + assert scoped_developers.include?(developers(:david)) + assert !scoped_developers.include?(developers(:jamis)) + assert_equal 1, scoped_developers.size + end + + + def test_nested_scoped_find_ar_join + Developer.with_scope(:find => { :joins => :projects }) do + Developer.with_scope(:find => { :conditions => "projects.id = 2" }) do + assert_equal('David', Developer.find(:first).name) + end + end + end + + def test_nested_scoped_find_merged_ar_join + # :include's remain unique and don't "double up" when merging + Developer.with_scope(:find => { :joins => :projects, :conditions => "projects.id = 2" }) do + Developer.with_scope(:find => { :joins => :projects }) do + assert_equal 1, Developer.instance_eval('current_scoped_methods')[:find][:ar_joins].length + assert_equal('David', Developer.find(:first).name) + end + end + # the nested scope doesn't remove the first :include + Developer.with_scope(:find => { :joins => :projects, :conditions => "projects.id = 2" }) do + Developer.with_scope(:find => { :joins => [] }) do + assert_equal 1, Developer.instance_eval('current_scoped_methods')[:find][:ar_joins].length + assert_equal('David', Developer.find(:first).name) + end + end + # mixing array and symbol include's will merge correctly + Developer.with_scope(:find => { :joins => [:projects], :conditions => "projects.id = 2" }) do + Developer.with_scope(:find => { :joins => :projects }) do + assert_equal 1, Developer.instance_eval('current_scoped_methods')[:find][:ar_joins].length + assert_equal('David', Developer.find(:first).name) + end + end + end + + def test_nested_scoped_find_replace_include + Developer.with_scope(:find => { :joins => :projects }) do + Developer.with_exclusive_scope(:find => { :joins => [] }) do + assert_equal 0, Developer.instance_eval('current_scoped_methods')[:find][:ar_joins].length + end + end + end + +# +# Calculations +# + def test_count_with_ar_joins + assert_equal(2, Author.count(:joins => :posts, :conditions => ['posts.type = ?', "Post"])) + assert_equal(1, Author.count(:joins => :posts, :conditions => ['posts.type = ?', "SpecialPost"])) + end + + def test_should_get_maximum_of_field_with_joins + assert_equal 50, Account.maximum(:credit_limit, :joins=> :firm, :conditions => "companies.name != 'Summit'") + end + + def test_should_get_maximum_of_field_with_scoped_include + Account.with_scope :find => { :joins => :firm, :conditions => "companies.name != 'Summit'" } do + assert_equal 50, Account.maximum(:credit_limit) + end + end + + def test_should_not_modify_options_when_using_ar_joins_on_count + options = {:conditions => 'companies.id > 1', :joins => :firm} + options_copy = options.dup + + Account.count(:all, options) + assert_equal options_copy, options + end + +end -- GitLab