提交 4797dc34 编写于 作者: W wangxiaohui.neo

init commit

上级
---
BasedOnStyle: Google
---
Language: Cpp
ColumnLimit: 80
test_case/
此差异已折叠。
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>GIT_CUB</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
org.eclipse.cdt.codan.checkers.errnoreturn=Warning
org.eclipse.cdt.codan.checkers.errnoreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},implicit\=>false}
org.eclipse.cdt.codan.checkers.errreturnvalue=Error
org.eclipse.cdt.codan.checkers.errreturnvalue.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.checkers.nocommentinside=-Error
org.eclipse.cdt.codan.checkers.nocommentinside.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.checkers.nolinecomment=-Error
org.eclipse.cdt.codan.checkers.nolinecomment.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.checkers.noreturn=Error
org.eclipse.cdt.codan.checkers.noreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},implicit\=>false}
org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation=Error
org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem=Error
org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem=Warning
org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem=Error
org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem=Warning
org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},no_break_comment\=>"no break",last_case_param\=>true,empty_case_param\=>false}
org.eclipse.cdt.codan.internal.checkers.CatchByReference=Warning
org.eclipse.cdt.codan.internal.checkers.CatchByReference.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},unknown\=>false,exceptions\=>()}
org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem=Error
org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization=Warning
org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},skip\=>true}
org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.InvalidArguments=Error
org.eclipse.cdt.codan.internal.checkers.InvalidArguments.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem=Error
org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem=Error
org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem=Error
org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker=-Info
org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},pattern\=>"^[a-z]",macro\=>true,exceptions\=>()}
org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem=Warning
org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.OverloadProblem=Error
org.eclipse.cdt.codan.internal.checkers.OverloadProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem=Error
org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem=Error
org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem=-Warning
org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem=-Warning
org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem=Warning
org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true,exceptions\=>()}
org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem=Warning
org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},paramNot\=>false}
org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem=Warning
org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},else\=>false,afterelse\=>false}
org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem=Warning
org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true}
org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem=Warning
org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true}
org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem=Warning
org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true,exceptions\=>("@(\#)","$Id")}
org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem=Error
org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}}
useParentScope=false
eclipse.preferences.version=1
indexer/indexAllFiles=true
indexer/indexAllHeaderVersions=false
indexer/indexAllVersionsSpecificHeaders=
indexer/indexOnOpen=false
indexer/indexUnusedHeadersWithAlternateLang=false
indexer/indexUnusedHeadersWithDefaultLang=true
indexer/indexerId=org.eclipse.cdt.core.fastIndexer
indexer/skipFilesLargerThanMB=8
indexer/skipImplicitReferences=false
indexer/skipIncludedFilesLargerThanMB=16
indexer/skipMacroReferences=false
indexer/skipReferences=false
indexer/skipTypeReferences=false
indexer/useHeuristicIncludeResolution=true
org.eclipse.cdt.core.formatter.alignment_for_arguments_in_method_invocation=16
org.eclipse.cdt.core.formatter.alignment_for_assignment=16
org.eclipse.cdt.core.formatter.alignment_for_base_clause_in_type_declaration=48
org.eclipse.cdt.core.formatter.alignment_for_binary_expression=16
org.eclipse.cdt.core.formatter.alignment_for_compact_if=0
org.eclipse.cdt.core.formatter.alignment_for_conditional_expression=48
org.eclipse.cdt.core.formatter.alignment_for_conditional_expression_chain=18
org.eclipse.cdt.core.formatter.alignment_for_constructor_initializer_list=0
org.eclipse.cdt.core.formatter.alignment_for_declarator_list=16
org.eclipse.cdt.core.formatter.alignment_for_enumerator_list=48
org.eclipse.cdt.core.formatter.alignment_for_expression_list=0
org.eclipse.cdt.core.formatter.alignment_for_expressions_in_array_initializer=16
org.eclipse.cdt.core.formatter.alignment_for_member_access=0
org.eclipse.cdt.core.formatter.alignment_for_overloaded_left_shift_chain=16
org.eclipse.cdt.core.formatter.alignment_for_parameters_in_method_declaration=48
org.eclipse.cdt.core.formatter.alignment_for_throws_clause_in_method_declaration=48
org.eclipse.cdt.core.formatter.brace_position_for_array_initializer=next_line
org.eclipse.cdt.core.formatter.brace_position_for_block=next_line
org.eclipse.cdt.core.formatter.brace_position_for_block_in_case=end_of_line
org.eclipse.cdt.core.formatter.brace_position_for_method_declaration=next_line
org.eclipse.cdt.core.formatter.brace_position_for_namespace_declaration=end_of_line
org.eclipse.cdt.core.formatter.brace_position_for_switch=end_of_line
org.eclipse.cdt.core.formatter.brace_position_for_type_declaration=next_line
org.eclipse.cdt.core.formatter.comment.min_distance_between_code_and_line_comment=1
org.eclipse.cdt.core.formatter.comment.never_indent_line_comments_on_first_column=true
org.eclipse.cdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=true
org.eclipse.cdt.core.formatter.compact_else_if=true
org.eclipse.cdt.core.formatter.continuation_indentation=1
org.eclipse.cdt.core.formatter.continuation_indentation_for_array_initializer=1
org.eclipse.cdt.core.formatter.format_guardian_clause_on_one_line=false
org.eclipse.cdt.core.formatter.indent_access_specifier_compare_to_type_header=false
org.eclipse.cdt.core.formatter.indent_access_specifier_extra_spaces=0
org.eclipse.cdt.core.formatter.indent_body_declarations_compare_to_access_specifier=true
org.eclipse.cdt.core.formatter.indent_body_declarations_compare_to_namespace_header=false
org.eclipse.cdt.core.formatter.indent_breaks_compare_to_cases=true
org.eclipse.cdt.core.formatter.indent_declaration_compare_to_template_header=false
org.eclipse.cdt.core.formatter.indent_empty_lines=false
org.eclipse.cdt.core.formatter.indent_statements_compare_to_block=true
org.eclipse.cdt.core.formatter.indent_statements_compare_to_body=true
org.eclipse.cdt.core.formatter.indent_switchstatements_compare_to_cases=true
org.eclipse.cdt.core.formatter.indent_switchstatements_compare_to_switch=false
org.eclipse.cdt.core.formatter.indentation.size=4
org.eclipse.cdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_after_template_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_catch_in_try_statement=insert
org.eclipse.cdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_colon_in_constructor_initializer_list=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_else_in_if_statement=insert
org.eclipse.cdt.core.formatter.insert_new_line_before_identifier_in_function_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
org.eclipse.cdt.core.formatter.insert_new_line_in_empty_block=insert
org.eclipse.cdt.core.formatter.insert_space_after_assignment_operator=insert
org.eclipse.cdt.core.formatter.insert_space_after_binary_operator=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_angle_bracket_in_template_arguments=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_angle_bracket_in_template_parameters=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_brace_in_block=insert
org.eclipse.cdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_base_clause=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_case=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_base_types=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_declarator_list=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_expression_list=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_template_arguments=insert
org.eclipse.cdt.core.formatter.insert_space_after_comma_in_template_parameters=insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_angle_bracket_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_angle_bracket_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_bracket=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_exception_specification=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_postfix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_prefix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_after_question_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_after_semicolon_in_for=insert
org.eclipse.cdt.core.formatter.insert_space_after_unary_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_assignment_operator=insert
org.eclipse.cdt.core.formatter.insert_space_before_binary_operator=insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_angle_bracket_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_angle_bracket_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_bracket=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_exception_specification=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_base_clause=insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_case=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_default=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_base_types=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_declarator_list=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_expression_list=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_comma_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_angle_bracket_in_template_arguments=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_angle_bracket_in_template_parameters=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_block=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_namespace_declaration=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_bracket=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_exception_specification=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_for=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_if=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_while=insert
org.eclipse.cdt.core.formatter.insert_space_before_postfix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_prefix_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_question_in_conditional=insert
org.eclipse.cdt.core.formatter.insert_space_before_semicolon=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
org.eclipse.cdt.core.formatter.insert_space_before_unary_operator=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_brackets=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_exception_specification=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
org.eclipse.cdt.core.formatter.join_wrapped_lines=true
org.eclipse.cdt.core.formatter.keep_else_statement_on_same_line=false
org.eclipse.cdt.core.formatter.keep_empty_array_initializer_on_one_line=false
org.eclipse.cdt.core.formatter.keep_imple_if_on_one_line=true
org.eclipse.cdt.core.formatter.keep_then_statement_on_same_line=false
org.eclipse.cdt.core.formatter.lineSplit=80
org.eclipse.cdt.core.formatter.number_of_empty_lines_to_preserve=1
org.eclipse.cdt.core.formatter.put_empty_statement_on_new_line=true
org.eclipse.cdt.core.formatter.tabulation.char=space
org.eclipse.cdt.core.formatter.tabulation.size=4
org.eclipse.cdt.core.formatter.use_tabs_only_for_leading_indentations=false
eclipse.preferences.version=1
formatter_profile=_B40C
formatter_settings_version=1
content-types/enabled=true
content-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh
content-types/org.eclipse.cdt.core.cxxSource/file-extensions=cu
eclipse.preferences.version=1
此差异已折叠。
Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the NVIDIA CORPORATION nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
<hr>
<h3>About CUB</h3>
Current release: v1.8.0 (02/16/2018)
We recommend the [CUB Project Website](http://nvlabs.github.com/cub) for further information and examples.
CUB provides state-of-the-art, reusable software components for every layer
of the CUDA programming model:
- [<b><em>Device-wide primitives</em></b>] (https://nvlabs.github.com/cub/group___device_module.html)
- Sort, prefix scan, reduction, histogram, etc.
- Compatible with CUDA dynamic parallelism
- [<b><em>Block-wide "collective" primitives</em></b>] (https://nvlabs.github.com/cub/group___block_module.html)
- I/O, sort, prefix scan, reduction, histogram, etc.
- Compatible with arbitrary thread block sizes and types
- [<b><em>Warp-wide "collective" primitives</em></b>] (https://nvlabs.github.com/cub/group___warp_module.html)
- Warp-wide prefix scan, reduction, etc.
- Safe and architecture-specific
- [<b><em>Thread and resource utilities</em></b>](https://nvlabs.github.com/cub/group___thread_module.html)
- PTX intrinsics, device reflection, texture-caching iterators, caching memory allocators, etc.
![Orientation of collective primitives within the CUDA software stack](http://nvlabs.github.com/cub/cub_overview.png)
<br><hr>
<h3>A Simple Example</h3>
```C++
#include <cub/cub.cuh>
// Block-sorting CUDA kernel
__global__ void BlockSortKernel(int *d_in, int *d_out)
{
using namespace cub;
// Specialize BlockRadixSort, BlockLoad, and BlockStore for 128 threads
// owning 16 integer items each
typedef BlockRadixSort<int, 128, 16> BlockRadixSort;
typedef BlockLoad<int, 128, 16, BLOCK_LOAD_TRANSPOSE> BlockLoad;
typedef BlockStore<int, 128, 16, BLOCK_STORE_TRANSPOSE> BlockStore;
// Allocate shared memory
__shared__ union {
typename BlockRadixSort::TempStorage sort;
typename BlockLoad::TempStorage load;
typename BlockStore::TempStorage store;
} temp_storage;
int block_offset = blockIdx.x * (128 * 16); // OffsetT for this block's ment
// Obtain a segment of 2048 consecutive keys that are blocked across threads
int thread_keys[16];
BlockLoad(temp_storage.load).Load(d_in + block_offset, thread_keys);
__syncthreads();
// Collectively sort the keys
BlockRadixSort(temp_storage.sort).Sort(thread_keys);
__syncthreads();
// Store the sorted segment
BlockStore(temp_storage.store).Store(d_out + block_offset, thread_keys);
}
```
Each thread block uses cub::BlockRadixSort to collectively sort
its own input segment. The class is specialized by the
data type being sorted, by the number of threads per block, by the number of
keys per thread, and implicitly by the targeted compilation architecture.
The cub::BlockLoad and cub::BlockStore classes are similarly specialized.
Furthermore, to provide coalesced accesses to device memory, these primitives are
configured to access memory using a striped access pattern (where consecutive threads
simultaneously access consecutive items) and then <em>transpose</em> the keys into
a [<em>blocked arrangement</em>](index.html#sec4sec3) of elements across threads.
Once specialized, these classes expose opaque \p TempStorage member types.
The thread block uses these storage types to statically allocate the union of
shared memory needed by the thread block. (Alternatively these storage types
could be aliased to global memory allocations).
<br><hr>
<h3>Stable Releases</h3>
CUB releases are labeled using version identifiers having three fields:
*epoch.feature.update*. The *epoch* field corresponds to support for
a major change in the CUDA programming model. The *feature* field
corresponds to a stable set of features, functionality, and interface. The
*update* field corresponds to a bug-fix or performance update for that
feature set. At the moment, we do not publicly provide non-stable releases
such as development snapshots, beta releases or rolling releases. (Feel free
to contact us if you would like such things.) See the
[CUB Project Website](http://nvlabs.github.com/cub) for more information.
<br><hr>
<h3>Contributors</h3>
CUB is developed as an open-source project by [NVIDIA Research](http://research.nvidia.com). The primary contributor is [Duane Merrill](http://github.com/dumerrill).
<br><hr>
<h3>Open Source License</h3>
CUB is available under the "New BSD" open-source license:
```
Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the NVIDIA CORPORATION nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```
#/******************************************************************************
# * Copyright (c) 2011, Duane Merrill. All rights reserved.
# * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions are met:
# * * Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * * Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in the
# * documentation and/or other materials provided with the distribution.
# * * Neither the name of the NVIDIA CORPORATION nor the
# * names of its contributors may be used to endorse or promote products
# * derived from this software without specific prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *
#******************************************************************************/
#-------------------------------------------------------------------------------
# Commandline Options
#-------------------------------------------------------------------------------
# [sm=<XXX,...>] Compute-capability to compile for, e.g., "sm=200,300,350" (SM20 by default).
COMMA = ,
ifdef sm
SM_ARCH = $(subst $(COMMA),-,$(sm))
else
SM_ARCH = 200
endif
ifeq (700, $(findstring 700, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_70,code=\"sm_70,compute_70\"
SM_DEF += -DSM700
TEST_ARCH = 700
endif
ifeq (620, $(findstring 620, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_62,code=\"sm_62,compute_62\"
SM_DEF += -DSM620
TEST_ARCH = 620
endif
ifeq (610, $(findstring 610, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_61,code=\"sm_61,compute_61\"
SM_DEF += -DSM610
TEST_ARCH = 610
endif
ifeq (600, $(findstring 600, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_60,code=\"sm_60,compute_60\"
SM_DEF += -DSM600
TEST_ARCH = 600
endif
ifeq (520, $(findstring 520, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
SM_DEF += -DSM520
TEST_ARCH = 520
endif
ifeq (370, $(findstring 370, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_37,code=\"sm_37,compute_37\"
SM_DEF += -DSM370
TEST_ARCH = 370
endif
ifeq (350, $(findstring 350, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
SM_DEF += -DSM350
TEST_ARCH = 350
endif
ifeq (300, $(findstring 300, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
SM_DEF += -DSM300
TEST_ARCH = 300
endif
ifeq (210, $(findstring 210, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_20,code=\"sm_21,compute_20\"
SM_DEF += -DSM210
TEST_ARCH = 210
endif
ifeq (200, $(findstring 200, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_20,code=\"sm_20,compute_20\"
SM_DEF += -DSM200
TEST_ARCH = 200
endif
ifeq (130, $(findstring 130, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_13,code=\"sm_13,compute_13\"
SM_DEF += -DSM130
TEST_ARCH = 130
endif
ifeq (120, $(findstring 120, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_12,code=\"sm_12,compute_12\"
SM_DEF += -DSM120
TEST_ARCH = 120
endif
ifeq (110, $(findstring 110, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_11,code=\"sm_11,compute_11\"
SM_DEF += -DSM110
TEST_ARCH = 110
endif
ifeq (100, $(findstring 100, $(SM_ARCH)))
SM_TARGETS += -gencode=arch=compute_10,code=\"sm_10,compute_10\"
SM_DEF += -DSM100
TEST_ARCH = 100
endif
# [cdp=<0|1>] CDP enable option (default: no)
ifeq ($(cdp), 1)
DEFINES += -DCUB_CDP
CDP_SUFFIX = cdp
NVCCFLAGS += -rdc=true -lcudadevrt
else
CDP_SUFFIX = nocdp
endif
# [force32=<0|1>] Device addressing mode option (64-bit device pointers by default)
ifeq ($(force32), 1)
CPU_ARCH = -m32
CPU_ARCH_SUFFIX = i386
else
CPU_ARCH = -m64
CPU_ARCH_SUFFIX = x86_64
NPPI = -lnppist
endif
# [abi=<0|1>] CUDA ABI option (enabled by default)
ifneq ($(abi), 0)
ABI_SUFFIX = abi
else
NVCCFLAGS += -Xptxas -abi=no
ABI_SUFFIX = noabi
endif
# [open64=<0|1>] Middle-end compiler option (nvvm by default)
ifeq ($(open64), 1)
NVCCFLAGS += -open64
PTX_SUFFIX = open64
else
PTX_SUFFIX = nvvm
endif
# [verbose=<0|1>] Verbose toolchain output from nvcc option
ifeq ($(verbose), 1)
NVCCFLAGS += -v
endif
# [keep=<0|1>] Keep intermediate compilation artifacts option
ifeq ($(keep), 1)
NVCCFLAGS += -keep
endif
# [debug=<0|1>] Generate debug mode code
ifeq ($(debug), 1)
NVCCFLAGS += -G
endif
#-------------------------------------------------------------------------------
# Compiler and compilation platform
#-------------------------------------------------------------------------------
CUB_DIR = $(dir $(lastword $(MAKEFILE_LIST)))
NVCC = "$(shell which nvcc)"
ifdef nvccver
NVCC_VERSION = $(nvccver)
else
NVCC_VERSION = $(strip $(shell nvcc --version | grep release | sed 's/.*release //' | sed 's/,.*//'))
endif
# detect OS
OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
# Default flags: verbose kernel properties (regs, smem, cmem, etc.); runtimes for compilation phases
NVCCFLAGS += $(SM_DEF) -Xptxas -v -Xcudafe -\#
ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER)))
# For MSVC
# Enable more warnings and treat as errors
NVCCFLAGS += -Xcompiler /W3 -Xcompiler /WX
# Disable excess x86 floating point precision that can lead to results being labeled incorrectly
NVCCFLAGS += -Xcompiler /fp:strict
# Help the compiler/linker work with huge numbers of kernels on Windows
NVCCFLAGS += -Xcompiler /bigobj -Xcompiler /Zm500
CC = cl
# Multithreaded runtime
NVCCFLAGS += -Xcompiler /MT
ifneq ($(force32), 1)
CUDART_CYG = "$(shell dirname $(NVCC))/../lib/Win32/cudart.lib"
else
CUDART_CYG = "$(shell dirname $(NVCC))/../lib/x64/cudart.lib"
endif
CUDART = "$(shell cygpath -w $(CUDART_CYG))"
else
# For g++
# Disable excess x86 floating point precision that can lead to results being labeled incorrectly
NVCCFLAGS += -Xcompiler -ffloat-store
CC = g++
ifneq ($(force32), 1)
CUDART = "$(shell dirname $(NVCC))/../lib/libcudart_static.a"
else
CUDART = "$(shell dirname $(NVCC))/../lib64/libcudart_static.a"
endif
endif
# Suffix to append to each binary
BIN_SUFFIX = sm$(SM_ARCH)_$(PTX_SUFFIX)_$(NVCC_VERSION)_$(ABI_SUFFIX)_$(CDP_SUFFIX)_$(CPU_ARCH_SUFFIX)
#-------------------------------------------------------------------------------
# Dependency Lists
#-------------------------------------------------------------------------------
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
CUB_DEPS = $(call rwildcard, $(CUB_DIR),*.cuh) \
$(CUB_DIR)common.mk
此差异已折叠。
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep .
*/
#pragma once
#include "../thread/thread_reduce.cuh"
#include "../thread/thread_load.cuh"
#include "../warp/warp_reduce.cuh"
#include "../block/block_load.cuh"
#include "../util_type.cuh"
#include "../iterator/cache_modified_input_iterator.cuh"
#include "../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/******************************************************************************
* Tuning policy types
******************************************************************************/
/**
* Parameterizable tuning policy type for AgentRadixSortUpsweep
*/
template <
int _BLOCK_THREADS, ///< Threads per thread block
int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input)
CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys
int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins)
struct AgentRadixSortUpsweepPolicy
{
enum
{
BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block
ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input)
RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins)
};
static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys
};
/******************************************************************************
* Thread block abstractions
******************************************************************************/
/**
* \brief AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep .
*/
template <
typename AgentRadixSortUpsweepPolicy, ///< Parameterized AgentRadixSortUpsweepPolicy tuning policy type
typename KeyT, ///< KeyT type
typename OffsetT> ///< Signed integer type for global offsets
struct AgentRadixSortUpsweep
{
//---------------------------------------------------------------------
// Type definitions and constants
//---------------------------------------------------------------------
typedef typename Traits<KeyT>::UnsignedBits UnsignedBits;
// Integer type for digit counters (to be packed into words of PackedCounters)
typedef unsigned char DigitCounter;
// Integer type for packing DigitCounters into columns of shared memory banks
typedef unsigned int PackedCounter;
static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortUpsweepPolicy::LOAD_MODIFIER;
enum
{
RADIX_BITS = AgentRadixSortUpsweepPolicy::RADIX_BITS,
BLOCK_THREADS = AgentRadixSortUpsweepPolicy::BLOCK_THREADS,
KEYS_PER_THREAD = AgentRadixSortUpsweepPolicy::ITEMS_PER_THREAD,
RADIX_DIGITS = 1 << RADIX_BITS,
LOG_WARP_THREADS = CUB_PTX_LOG_WARP_THREADS,
WARP_THREADS = 1 << LOG_WARP_THREADS,
WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS,
TILE_ITEMS = BLOCK_THREADS * KEYS_PER_THREAD,
BYTES_PER_COUNTER = sizeof(DigitCounter),
LOG_BYTES_PER_COUNTER = Log2<BYTES_PER_COUNTER>::VALUE,
PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter),
LOG_PACKING_RATIO = Log2<PACKING_RATIO>::VALUE,
LOG_COUNTER_LANES = CUB_MAX(0, RADIX_BITS - LOG_PACKING_RATIO),
COUNTER_LANES = 1 << LOG_COUNTER_LANES,
// To prevent counter overflow, we must periodically unpack and aggregate the
// digit counters back into registers. Each counter lane is assigned to a
// warp for aggregation.
LANES_PER_WARP = CUB_MAX(1, (COUNTER_LANES + WARPS - 1) / WARPS),
// Unroll tiles in batches without risk of counter overflow
UNROLL_COUNT = CUB_MIN(64, 255 / KEYS_PER_THREAD),
UNROLLED_ELEMENTS = UNROLL_COUNT * TILE_ITEMS,
};
// Input iterator wrapper type (for applying cache modifier)s
typedef CacheModifiedInputIterator<LOAD_MODIFIER, UnsignedBits, OffsetT> KeysItr;
/**
* Shared memory storage layout
*/
union __align__(16) _TempStorage
{
DigitCounter thread_counters[COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO];
PackedCounter packed_thread_counters[COUNTER_LANES][BLOCK_THREADS];
OffsetT block_counters[WARP_THREADS][RADIX_DIGITS];
};
/// Alias wrapper allowing storage to be unioned
struct TempStorage : Uninitialized<_TempStorage> {};
//---------------------------------------------------------------------
// Thread fields (aggregate state bundle)
//---------------------------------------------------------------------
// Shared storage for this CTA
_TempStorage &temp_storage;
// Thread-local counters for periodically aggregating composite-counter lanes
OffsetT local_counts[LANES_PER_WARP][PACKING_RATIO];
// Input and output device pointers
KeysItr d_keys_in;
// The least-significant bit position of the current digit to extract
int current_bit;
// Number of bits in current digit
int num_bits;
//---------------------------------------------------------------------
// Helper structure for templated iteration
//---------------------------------------------------------------------
// Iterate
template <int COUNT, int MAX>
struct Iterate
{
// BucketKeys
static __device__ __forceinline__ void BucketKeys(
AgentRadixSortUpsweep &cta,
UnsignedBits keys[KEYS_PER_THREAD])
{
cta.Bucket(keys[COUNT]);
// Next
Iterate<COUNT + 1, MAX>::BucketKeys(cta, keys);
}
};
// Terminate
template <int MAX>
struct Iterate<MAX, MAX>
{
// BucketKeys
static __device__ __forceinline__ void BucketKeys(AgentRadixSortUpsweep &/*cta*/, UnsignedBits /*keys*/[KEYS_PER_THREAD]) {}
};
//---------------------------------------------------------------------
// Utility methods
//---------------------------------------------------------------------
/**
* Decode a key and increment corresponding smem digit counter
*/
__device__ __forceinline__ void Bucket(UnsignedBits key)
{
// Perform transform op
UnsignedBits converted_key = Traits<KeyT>::TwiddleIn(key);
// Extract current digit bits
UnsignedBits digit = BFE(converted_key, current_bit, num_bits);
// Get sub-counter offset
UnsignedBits sub_counter = digit & (PACKING_RATIO - 1);
// Get row offset
UnsignedBits row_offset = digit >> LOG_PACKING_RATIO;
// Increment counter
temp_storage.thread_counters[row_offset][threadIdx.x][sub_counter]++;
}
/**
* Reset composite counters
*/
__device__ __forceinline__ void ResetDigitCounters()
{
#pragma unroll
for (int LANE = 0; LANE < COUNTER_LANES; LANE++)
{
temp_storage.packed_thread_counters[LANE][threadIdx.x] = 0;
}
}
/**
* Reset the unpacked counters in each thread
*/
__device__ __forceinline__ void ResetUnpackedCounters()
{
#pragma unroll
for (int LANE = 0; LANE < LANES_PER_WARP; LANE++)
{
#pragma unroll
for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++)
{
local_counts[LANE][UNPACKED_COUNTER] = 0;
}
}
}
/**
* Extracts and aggregates the digit counters for each counter lane
* owned by this warp
*/
__device__ __forceinline__ void UnpackDigitCounts()
{
unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS;
unsigned int warp_tid = LaneId();
#pragma unroll
for (int LANE = 0; LANE < LANES_PER_WARP; LANE++)
{
const int counter_lane = (LANE * WARPS) + warp_id;
if (counter_lane < COUNTER_LANES)
{
#pragma unroll
for (int PACKED_COUNTER = 0; PACKED_COUNTER < BLOCK_THREADS; PACKED_COUNTER += WARP_THREADS)
{
#pragma unroll
for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++)
{
OffsetT counter = temp_storage.thread_counters[counter_lane][warp_tid + PACKED_COUNTER][UNPACKED_COUNTER];
local_counts[LANE][UNPACKED_COUNTER] += counter;
}
}
}
}
}
/**
* Processes a single, full tile
*/
__device__ __forceinline__ void ProcessFullTile(OffsetT block_offset)
{
// Tile of keys
UnsignedBits keys[KEYS_PER_THREAD];
LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_keys_in + block_offset, keys);
// Prevent hoisting
CTA_SYNC();
// Bucket tile of keys
Iterate<0, KEYS_PER_THREAD>::BucketKeys(*this, keys);
}
/**
* Processes a single load (may have some threads masked off)
*/
__device__ __forceinline__ void ProcessPartialTile(
OffsetT block_offset,
const OffsetT &block_end)
{
// Process partial tile if necessary using single loads
block_offset += threadIdx.x;
while (block_offset < block_end)
{
// Load and bucket key
UnsignedBits key = d_keys_in[block_offset];
Bucket(key);
block_offset += BLOCK_THREADS;
}
}
//---------------------------------------------------------------------
// Interface
//---------------------------------------------------------------------
/**
* Constructor
*/
__device__ __forceinline__ AgentRadixSortUpsweep(
TempStorage &temp_storage,
const KeyT *d_keys_in,
int current_bit,
int num_bits)
:
temp_storage(temp_storage.Alias()),
d_keys_in(reinterpret_cast<const UnsignedBits*>(d_keys_in)),
current_bit(current_bit),
num_bits(num_bits)
{}
/**
* Compute radix digit histograms from a segment of input tiles.
*/
__device__ __forceinline__ void ProcessRegion(
OffsetT block_offset,
const OffsetT &block_end)
{
// Reset digit counters in smem and unpacked counters in registers
ResetDigitCounters();
ResetUnpackedCounters();
// Unroll batches of full tiles
while (block_offset + UNROLLED_ELEMENTS <= block_end)
{
for (int i = 0; i < UNROLL_COUNT; ++i)
{
ProcessFullTile(block_offset);
block_offset += TILE_ITEMS;
}
CTA_SYNC();
// Aggregate back into local_count registers to prevent overflow
UnpackDigitCounts();
CTA_SYNC();
// Reset composite counters in lanes
ResetDigitCounters();
}
// Unroll single full tiles
while (block_offset + TILE_ITEMS <= block_end)
{
ProcessFullTile(block_offset);
block_offset += TILE_ITEMS;
}
// Process partial tile if necessary
ProcessPartialTile(
block_offset,
block_end);
CTA_SYNC();
// Aggregate back into local_count registers
UnpackDigitCounts();
}
/**
* Extract counts (saving them to the external array)
*/
template <bool IS_DESCENDING>
__device__ __forceinline__ void ExtractCounts(
OffsetT *counters,
int bin_stride = 1,
int bin_offset = 0)
{
unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS;
unsigned int warp_tid = LaneId();
// Place unpacked digit counters in shared memory
#pragma unroll
for (int LANE = 0; LANE < LANES_PER_WARP; LANE++)
{
int counter_lane = (LANE * WARPS) + warp_id;
if (counter_lane < COUNTER_LANES)
{
int digit_row = counter_lane << LOG_PACKING_RATIO;
#pragma unroll
for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++)
{
int bin_idx = digit_row + UNPACKED_COUNTER;
temp_storage.block_counters[warp_tid][bin_idx] =
local_counts[LANE][UNPACKED_COUNTER];
}
}
}
CTA_SYNC();
// Rake-reduce bin_count reductions
// Whole blocks
#pragma unroll
for (int BIN_BASE = RADIX_DIGITS % BLOCK_THREADS;
(BIN_BASE + BLOCK_THREADS) <= RADIX_DIGITS;
BIN_BASE += BLOCK_THREADS)
{
int bin_idx = BIN_BASE + threadIdx.x;
OffsetT bin_count = 0;
#pragma unroll
for (int i = 0; i < WARP_THREADS; ++i)
bin_count += temp_storage.block_counters[i][bin_idx];
if (IS_DESCENDING)
bin_idx = RADIX_DIGITS - bin_idx - 1;
counters[(bin_stride * bin_idx) + bin_offset] = bin_count;
}
// Remainder
if ((RADIX_DIGITS % BLOCK_THREADS != 0) && (threadIdx.x < RADIX_DIGITS))
{
int bin_idx = threadIdx.x;
OffsetT bin_count = 0;
#pragma unroll
for (int i = 0; i < WARP_THREADS; ++i)
bin_count += temp_storage.block_counters[i][bin_idx];
if (IS_DESCENDING)
bin_idx = RADIX_DIGITS - bin_idx - 1;
counters[(bin_stride * bin_idx) + bin_offset] = bin_count;
}
}
/**
* Extract counts
*/
template <int BINS_TRACKED_PER_THREAD>
__device__ __forceinline__ void ExtractCounts(
OffsetT (&bin_count)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1]
{
unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS;
unsigned int warp_tid = LaneId();
// Place unpacked digit counters in shared memory
#pragma unroll
for (int LANE = 0; LANE < LANES_PER_WARP; LANE++)
{
int counter_lane = (LANE * WARPS) + warp_id;
if (counter_lane < COUNTER_LANES)
{
int digit_row = counter_lane << LOG_PACKING_RATIO;
#pragma unroll
for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++)
{
int bin_idx = digit_row + UNPACKED_COUNTER;
temp_storage.block_counters[warp_tid][bin_idx] =
local_counts[LANE][UNPACKED_COUNTER];
}
}
}
CTA_SYNC();
// Rake-reduce bin_count reductions
#pragma unroll
for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track)
{
int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track;
if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS))
{
bin_count[track] = 0;
#pragma unroll
for (int i = 0; i < WARP_THREADS; ++i)
bin_count[track] += temp_storage.block_counters[i][bin_idx];
}
}
}
};
} // CUB namespace
CUB_NS_POSTFIX // Optional outer namespace(s)
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* cub::AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction .
*/
#pragma once
#include <iterator>
#include "../block/block_load.cuh"
#include "../block/block_reduce.cuh"
#include "../grid/grid_mapping.cuh"
#include "../grid/grid_even_share.cuh"
#include "../util_type.cuh"
#include "../iterator/cache_modified_input_iterator.cuh"
#include "../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/******************************************************************************
* Tuning policy types
******************************************************************************/
/**
* Parameterizable tuning policy type for AgentReduce
*/
template <
int _BLOCK_THREADS, ///< Threads per thread block
int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input)
int _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load
BlockReduceAlgorithm _BLOCK_ALGORITHM, ///< Cooperative block-wide reduction algorithm to use
CacheLoadModifier _LOAD_MODIFIER> ///< Cache load modifier for reading input elements
struct AgentReducePolicy
{
enum
{
BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block
ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input)
VECTOR_LOAD_LENGTH = _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load
};
static const BlockReduceAlgorithm BLOCK_ALGORITHM = _BLOCK_ALGORITHM; ///< Cooperative block-wide reduction algorithm to use
static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements
};
/******************************************************************************
* Thread block abstractions
******************************************************************************/
/**
* \brief AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction .
*
* Each thread reduces only the values it loads. If \p FIRST_TILE, this
* partial reduction is stored into \p thread_aggregate. Otherwise it is
* accumulated into \p thread_aggregate.
*/
template <
typename AgentReducePolicy, ///< Parameterized AgentReducePolicy tuning policy type
typename InputIteratorT, ///< Random-access iterator type for input
typename OutputIteratorT, ///< Random-access iterator type for output
typename OffsetT, ///< Signed integer type for global offsets
typename ReductionOp> ///< Binary reduction operator type having member <tt>T operator()(const T &a, const T &b)</tt>
struct AgentReduce
{
//---------------------------------------------------------------------
// Types and constants
//---------------------------------------------------------------------
/// The input value type
typedef typename std::iterator_traits<InputIteratorT>::value_type InputT;
/// The output value type
typedef typename If<(Equals<typename std::iterator_traits<OutputIteratorT>::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ?
typename std::iterator_traits<InputIteratorT>::value_type, // ... then the input iterator's value type,
typename std::iterator_traits<OutputIteratorT>::value_type>::Type OutputT; // ... else the output iterator's value type
/// Vector type of InputT for data movement
typedef typename CubVector<InputT, AgentReducePolicy::VECTOR_LOAD_LENGTH>::Type VectorT;
/// Input iterator wrapper type (for applying cache modifier)
typedef typename If<IsPointer<InputIteratorT>::VALUE,
CacheModifiedInputIterator<AgentReducePolicy::LOAD_MODIFIER, InputT, OffsetT>, // Wrap the native input pointer with CacheModifiedInputIterator
InputIteratorT>::Type // Directly use the supplied input iterator type
WrappedInputIteratorT;
/// Constants
enum
{
BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS,
ITEMS_PER_THREAD = AgentReducePolicy::ITEMS_PER_THREAD,
VECTOR_LOAD_LENGTH = CUB_MIN(ITEMS_PER_THREAD, AgentReducePolicy::VECTOR_LOAD_LENGTH),
TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD,
// Can vectorize according to the policy if the input iterator is a native pointer to a primitive type
ATTEMPT_VECTORIZATION = (VECTOR_LOAD_LENGTH > 1) &&
(ITEMS_PER_THREAD % VECTOR_LOAD_LENGTH == 0) &&
(IsPointer<InputIteratorT>::VALUE) && Traits<InputT>::PRIMITIVE,
};
static const CacheLoadModifier LOAD_MODIFIER = AgentReducePolicy::LOAD_MODIFIER;
static const BlockReduceAlgorithm BLOCK_ALGORITHM = AgentReducePolicy::BLOCK_ALGORITHM;
/// Parameterized BlockReduce primitive
typedef BlockReduce<OutputT, BLOCK_THREADS, AgentReducePolicy::BLOCK_ALGORITHM> BlockReduceT;
/// Shared memory type required by this thread block
struct _TempStorage
{
typename BlockReduceT::TempStorage reduce;
};
/// Alias wrapper allowing storage to be unioned
struct TempStorage : Uninitialized<_TempStorage> {};
//---------------------------------------------------------------------
// Per-thread fields
//---------------------------------------------------------------------
_TempStorage& temp_storage; ///< Reference to temp_storage
InputIteratorT d_in; ///< Input data to reduce
WrappedInputIteratorT d_wrapped_in; ///< Wrapped input data to reduce
ReductionOp reduction_op; ///< Binary reduction operator
//---------------------------------------------------------------------
// Utility
//---------------------------------------------------------------------
// Whether or not the input is aligned with the vector type (specialized for types we can vectorize)
template <typename Iterator>
static __device__ __forceinline__ bool IsAligned(
Iterator d_in,
Int2Type<true> /*can_vectorize*/)
{
return (size_t(d_in) & (sizeof(VectorT) - 1)) == 0;
}
// Whether or not the input is aligned with the vector type (specialized for types we cannot vectorize)
template <typename Iterator>
static __device__ __forceinline__ bool IsAligned(
Iterator /*d_in*/,
Int2Type<false> /*can_vectorize*/)
{
return false;
}
//---------------------------------------------------------------------
// Constructor
//---------------------------------------------------------------------
/**
* Constructor
*/
__device__ __forceinline__ AgentReduce(
TempStorage& temp_storage, ///< Reference to temp_storage
InputIteratorT d_in, ///< Input data to reduce
ReductionOp reduction_op) ///< Binary reduction operator
:
temp_storage(temp_storage.Alias()),
d_in(d_in),
d_wrapped_in(d_in),
reduction_op(reduction_op)
{}
//---------------------------------------------------------------------
// Tile consumption
//---------------------------------------------------------------------
/**
* Consume a full tile of input (non-vectorized)
*/
template <int IS_FIRST_TILE>
__device__ __forceinline__ void ConsumeTile(
OutputT &thread_aggregate,
OffsetT block_offset, ///< The offset the tile to consume
int /*valid_items*/, ///< The number of valid items in the tile
Int2Type<true> /*is_full_tile*/, ///< Whether or not this is a full tile
Int2Type<false> /*can_vectorize*/) ///< Whether or not we can vectorize loads
{
OutputT items[ITEMS_PER_THREAD];
// Load items in striped fashion
LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_wrapped_in + block_offset, items);
// Reduce items within each thread stripe
thread_aggregate = (IS_FIRST_TILE) ?
internal::ThreadReduce(items, reduction_op) :
internal::ThreadReduce(items, reduction_op, thread_aggregate);
}
/**
* Consume a full tile of input (vectorized)
*/
template <int IS_FIRST_TILE>
__device__ __forceinline__ void ConsumeTile(
OutputT &thread_aggregate,
OffsetT block_offset, ///< The offset the tile to consume
int /*valid_items*/, ///< The number of valid items in the tile
Int2Type<true> /*is_full_tile*/, ///< Whether or not this is a full tile
Int2Type<true> /*can_vectorize*/) ///< Whether or not we can vectorize loads
{
// Alias items as an array of VectorT and load it in striped fashion
enum { WORDS = ITEMS_PER_THREAD / VECTOR_LOAD_LENGTH };
// Fabricate a vectorized input iterator
InputT *d_in_unqualified = const_cast<InputT*>(d_in) + block_offset + (threadIdx.x * VECTOR_LOAD_LENGTH);
CacheModifiedInputIterator<AgentReducePolicy::LOAD_MODIFIER, VectorT, OffsetT> d_vec_in(
reinterpret_cast<VectorT*>(d_in_unqualified));
// Load items as vector items
InputT input_items[ITEMS_PER_THREAD];
VectorT *vec_items = reinterpret_cast<VectorT*>(input_items);
#pragma unroll
for (int i = 0; i < WORDS; ++i)
vec_items[i] = d_vec_in[BLOCK_THREADS * i];
// Convert from input type to output type
OutputT items[ITEMS_PER_THREAD];
#pragma unroll
for (int i = 0; i < ITEMS_PER_THREAD; ++i)
items[i] = input_items[i];
// Reduce items within each thread stripe
thread_aggregate = (IS_FIRST_TILE) ?
internal::ThreadReduce(items, reduction_op) :
internal::ThreadReduce(items, reduction_op, thread_aggregate);
}
/**
* Consume a partial tile of input
*/
template <int IS_FIRST_TILE, int CAN_VECTORIZE>
__device__ __forceinline__ void ConsumeTile(
OutputT &thread_aggregate,
OffsetT block_offset, ///< The offset the tile to consume
int valid_items, ///< The number of valid items in the tile
Int2Type<false> /*is_full_tile*/, ///< Whether or not this is a full tile
Int2Type<CAN_VECTORIZE> /*can_vectorize*/) ///< Whether or not we can vectorize loads
{
// Partial tile
int thread_offset = threadIdx.x;
// Read first item
if ((IS_FIRST_TILE) && (thread_offset < valid_items))
{
thread_aggregate = d_wrapped_in[block_offset + thread_offset];
thread_offset += BLOCK_THREADS;
}
// Continue reading items (block-striped)
while (thread_offset < valid_items)
{
OutputT item = d_wrapped_in[block_offset + thread_offset];
thread_aggregate = reduction_op(thread_aggregate, item);
thread_offset += BLOCK_THREADS;
}
}
//---------------------------------------------------------------
// Consume a contiguous segment of tiles
//---------------------------------------------------------------------
/**
* \brief Reduce a contiguous segment of input tiles
*/
template <int CAN_VECTORIZE>
__device__ __forceinline__ OutputT ConsumeRange(
GridEvenShare<OffsetT> &even_share, ///< GridEvenShare descriptor
Int2Type<CAN_VECTORIZE> can_vectorize) ///< Whether or not we can vectorize loads
{
OutputT thread_aggregate;
if (even_share.block_offset + TILE_ITEMS > even_share.block_end)
{
// First tile isn't full (not all threads have valid items)
int valid_items = even_share.block_end - even_share.block_offset;
ConsumeTile<true>(thread_aggregate, even_share.block_offset, valid_items, Int2Type<false>(), can_vectorize);
return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op, valid_items);
}
// At least one full block
ConsumeTile<true>(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type<true>(), can_vectorize);
even_share.block_offset += even_share.block_stride;
// Consume subsequent full tiles of input
while (even_share.block_offset + TILE_ITEMS <= even_share.block_end)
{
ConsumeTile<false>(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type<true>(), can_vectorize);
even_share.block_offset += even_share.block_stride;
}
// Consume a partially-full tile
if (even_share.block_offset < even_share.block_end)
{
int valid_items = even_share.block_end - even_share.block_offset;
ConsumeTile<false>(thread_aggregate, even_share.block_offset, valid_items, Int2Type<false>(), can_vectorize);
}
// Compute block-wide reduction (all threads have valid items)
return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op);
}
/**
* \brief Reduce a contiguous segment of input tiles
*/
__device__ __forceinline__ OutputT ConsumeRange(
OffsetT block_offset, ///< [in] Threadblock begin offset (inclusive)
OffsetT block_end) ///< [in] Threadblock end offset (exclusive)
{
GridEvenShare<OffsetT> even_share;
even_share.template BlockInit<TILE_ITEMS>(block_offset, block_end);
return (IsAligned(d_in + block_offset, Int2Type<ATTEMPT_VECTORIZATION>())) ?
ConsumeRange(even_share, Int2Type<true && ATTEMPT_VECTORIZATION>()) :
ConsumeRange(even_share, Int2Type<false && ATTEMPT_VECTORIZATION>());
}
/**
* Reduce a contiguous segment of input tiles
*/
__device__ __forceinline__ OutputT ConsumeTiles(
GridEvenShare<OffsetT> &even_share) ///< [in] GridEvenShare descriptor
{
// Initialize GRID_MAPPING_STRIP_MINE even-share descriptor for this thread block
even_share.template BlockInit<TILE_ITEMS, GRID_MAPPING_STRIP_MINE>();
return (IsAligned(d_in, Int2Type<ATTEMPT_VECTORIZATION>())) ?
ConsumeRange(even_share, Int2Type<true && ATTEMPT_VECTORIZATION>()) :
ConsumeRange(even_share, Int2Type<false && ATTEMPT_VECTORIZATION>());
}
};
} // CUB namespace
CUB_NS_POSTFIX // Optional outer namespace(s)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* cub::BlockRakingLayout provides a conflict-free shared memory layout abstraction for warp-raking across thread block data.
*/
#pragma once
#include "../util_macro.cuh"
#include "../util_arch.cuh"
#include "../util_type.cuh"
#include "../util_namespace.cuh"
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/**
* \brief BlockRakingLayout provides a conflict-free shared memory layout abstraction for 1D raking across thread block data. ![](raking.png)
* \ingroup BlockModule
*
* \par Overview
* This type facilitates a shared memory usage pattern where a block of CUDA
* threads places elements into shared memory and then reduces the active
* parallelism to one "raking" warp of threads for serially aggregating consecutive
* sequences of shared items. Padding is inserted to eliminate bank conflicts
* (for most data types).
*
* \tparam T The data type to be exchanged.
* \tparam BLOCK_THREADS The thread block size in threads.
* \tparam PTX_ARCH <b>[optional]</b> \ptxversion
*/
template <
typename T,
int BLOCK_THREADS,
int PTX_ARCH = CUB_PTX_ARCH>
struct BlockRakingLayout
{
//---------------------------------------------------------------------
// Constants and type definitions
//---------------------------------------------------------------------
enum
{
/// The total number of elements that need to be cooperatively reduced
SHARED_ELEMENTS = BLOCK_THREADS,
/// Maximum number of warp-synchronous raking threads
MAX_RAKING_THREADS = CUB_MIN(BLOCK_THREADS, CUB_WARP_THREADS(PTX_ARCH)),
/// Number of raking elements per warp-synchronous raking thread (rounded up)
SEGMENT_LENGTH = (SHARED_ELEMENTS + MAX_RAKING_THREADS - 1) / MAX_RAKING_THREADS,
/// Never use a raking thread that will have no valid data (e.g., when BLOCK_THREADS is 62 and SEGMENT_LENGTH is 2, we should only use 31 raking threads)
RAKING_THREADS = (SHARED_ELEMENTS + SEGMENT_LENGTH - 1) / SEGMENT_LENGTH,
/// Whether we will have bank conflicts (technically we should find out if the GCD is > 1)
HAS_CONFLICTS = (CUB_SMEM_BANKS(PTX_ARCH) % SEGMENT_LENGTH == 0),
/// Degree of bank conflicts (e.g., 4-way)
CONFLICT_DEGREE = (HAS_CONFLICTS) ?
(MAX_RAKING_THREADS * SEGMENT_LENGTH) / CUB_SMEM_BANKS(PTX_ARCH) :
1,
/// Pad each segment length with one element if segment length is not relatively prime to warp size and can't be optimized as a vector load
USE_SEGMENT_PADDING = ((SEGMENT_LENGTH & 1) == 0) && (SEGMENT_LENGTH > 2),
/// Total number of elements in the raking grid
GRID_ELEMENTS = RAKING_THREADS * (SEGMENT_LENGTH + USE_SEGMENT_PADDING),
/// Whether or not we need bounds checking during raking (the number of reduction elements is not a multiple of the number of raking threads)
UNGUARDED = (SHARED_ELEMENTS % RAKING_THREADS == 0),
};
/**
* \brief Shared memory storage type
*/
struct __align__(16) _TempStorage
{
T buff[BlockRakingLayout::GRID_ELEMENTS];
};
/// Alias wrapper allowing storage to be unioned
struct TempStorage : Uninitialized<_TempStorage> {};
/**
* \brief Returns the location for the calling thread to place data into the grid
*/
static __device__ __forceinline__ T* PlacementPtr(
TempStorage &temp_storage,
unsigned int linear_tid)
{
// Offset for partial
unsigned int offset = linear_tid;
// Add in one padding element for every segment
if (USE_SEGMENT_PADDING > 0)
{
offset += offset / SEGMENT_LENGTH;
}
// Incorporating a block of padding partials every shared memory segment
return temp_storage.Alias().buff + offset;
}
/**
* \brief Returns the location for the calling thread to begin sequential raking
*/
static __device__ __forceinline__ T* RakingPtr(
TempStorage &temp_storage,
unsigned int linear_tid)
{
return temp_storage.Alias().buff + (linear_tid * (SEGMENT_LENGTH + USE_SEGMENT_PADDING));
}
};
} // CUB namespace
CUB_NS_POSTFIX // Optional outer namespace(s)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* Place-holder for prefixing the cub namespace
*/
#pragma once
// For example:
//#define CUB_NS_PREFIX namespace thrust{ namespace detail {
//#define CUB_NS_POSTFIX } }
#ifndef CUB_NS_PREFIX
#define CUB_NS_PREFIX
#endif
#ifndef CUB_NS_POSTFIX
#define CUB_NS_POSTFIX
#endif
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/bin
/Debug
/Release
/cuda55.sdf
/cuda55.suo
/cuda60.sdf
/cuda60.suo
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册