The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Market Basket Analysis
Learner I
I want to use market basket analysis for Data that I have (Attached file), which columns should I select?
and which operator should I select? could you guide me step by step to figure out the association rule so that i could find correlation between products which are bought together.
many thanks
and which operator should I select? could you guide me step by step to figure out the association rule so that i could find correlation between products which are bought together.
many thanks
Tagged:
0
Answers
Thanks for sharing the transaction data set. The improved FP-Growth operator for MB analysis can take various input formats. Please check out the tutorial process from the help docs for the detailed explanation of the acceptable formats
https://docs.rapidminer.com/latest/studio/operators/modeling/associations/fp_growth.html
See the second tutorial for examples. As discussed in detail in the description, this Operator supports several different formats for the input data.
Since the sample data is collected in the same day. I assume each line in raw data lists one item purchased by one customer, so I aggregated transactions by customer ID.
<?xml version="1.0" encoding="UTF-8"?><process version="9.4.000-BETA2"> <context> <input/> <output/> <macros/> </context> <operator activated="true" class="process" compatibility="9.4.000-BETA2" expanded="true" name="Process"> <parameter key="logverbosity" value="init"/> <parameter key="random_seed" value="2001"/> <parameter key="send_mail" value="never"/> <parameter key="notification_email" value="yhuang@rapidminer.com"/> <parameter key="process_duration_for_mail" value="1"/> <parameter key="encoding" value="SYSTEM"/> <process expanded="true"> <operator activated="true" class="retrieve" compatibility="9.4.000-BETA2" expanded="true" height="68" name="Retrieve تراکنش-20" width="90" x="313" y="34"> <parameter key="repository_entry" value="//RM YY Loal Repository/from Community/data_tmp/تراکنش-20"/> </operator> <operator activated="true" class="numerical_to_polynominal" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="447" y="34"> <parameter key="attribute_filter_type" value="all"/> <parameter key="attribute" value=""/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="numeric"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="real"/> <parameter key="block_type" value="value_series"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="value_series_end"/> <parameter key="invert_selection" value="false"/> <parameter key="include_special_attributes" value="false"/> </operator> <operator activated="true" class="aggregate" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Aggregate" width="90" x="581" y="34"> <parameter key="use_default_aggregation" value="false"/> <parameter key="attribute_filter_type" value="all"/> <parameter key="attribute" value=""/> <parameter key="attributes" value=""/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="attribute_value"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="time"/> <parameter key="block_type" value="attribute_block"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="value_matrix_row_start"/> <parameter key="invert_selection" value="false"/> <parameter key="include_special_attributes" value="false"/> <parameter key="default_aggregation_function" value="average"/> <list key="aggregation_attributes"> <parameter key="ItemID" value="concatenation"/> <parameter key="ItemName" value="concatenation"/> </list> <parameter key="group_by_attributes" value="dt|CustomerID"/> <parameter key="count_all_combinations" value="false"/> <parameter key="only_distinct" value="false"/> <parameter key="ignore_missings" value="true"/> </operator> <operator activated="true" class="select_attributes" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Select Attributes" width="90" x="715" y="34"> <parameter key="attribute_filter_type" value="subset"/> <parameter key="attribute" value=""/> <parameter key="attributes" value="CustomerID|concat(ItemName)"/> <parameter key="use_except_expression" value="false"/> <parameter key="value_type" value="attribute_value"/> <parameter key="use_value_type_exception" value="false"/> <parameter key="except_value_type" value="time"/> <parameter key="block_type" value="attribute_block"/> <parameter key="use_block_type_exception" value="false"/> <parameter key="except_block_type" value="value_matrix_row_start"/> <parameter key="invert_selection" value="false"/> <parameter key="include_special_attributes" value="false"/> </operator> <operator activated="true" class="set_role" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Set Role" width="90" x="849" y="34"> <parameter key="attribute_name" value="CustomerID"/> <parameter key="target_role" value="id"/> <list key="set_additional_roles"/> </operator> <operator activated="true" class="concurrency:fp_growth" compatibility="9.4.000-BETA2" expanded="true" height="82" name="FP-Growth" width="90" x="983" y="34"> <parameter key="input_format" value="item list in a column"/> <parameter key="item_separators" value="|"/> <parameter key="use_quotes" value="false"/> <parameter key="quotes_character" value="""/> <parameter key="escape_character" value="\"/> <parameter key="trim_item_names" value="true"/> <parameter key="min_requirement" value="frequency"/> <parameter key="min_support" value="0.95"/> <parameter key="min_frequency" value="5"/> <parameter key="min_items_per_itemset" value="1"/> <parameter key="max_items_per_itemset" value="0"/> <parameter key="max_number_of_itemsets" value="1000000"/> <parameter key="find_min_number_of_itemsets" value="true"/> <parameter key="min_number_of_itemsets" value="100"/> <parameter key="max_number_of_retries" value="15"/> <parameter key="requirement_decrease_factor" value="0.9"/> <enumeration key="must_contain_list"/> </operator> <operator activated="true" class="create_association_rules" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Create Association Rules (2)" width="90" x="1184" y="85"> <parameter key="criterion" value="confidence"/> <parameter key="min_confidence" value="0.8"/> <parameter key="min_criterion_value" value="0.8"/> <parameter key="gain_theta" value="2.0"/> <parameter key="laplace_k" value="1.0"/> </operator> <connect from_op="Retrieve تراکنش-20" from_port="output" to_op="Numerical to Polynominal" to_port="example set input"/> <connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Aggregate" to_port="example set input"/> <connect from_op="Aggregate" from_port="example set output" to_op="Select Attributes" to_port="example set input"/> <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/> <connect from_op="Set Role" from_port="example set output" to_op="FP-Growth" to_port="example set"/> <connect from_op="FP-Growth" from_port="example set" to_port="result 1"/> <connect from_op="FP-Growth" from_port="frequent sets" to_op="Create Association Rules (2)" to_port="item sets"/> <connect from_op="Create Association Rules (2)" from_port="rules" to_port="result 2"/> <portSpacing port="source_input 1" spacing="0"/> <portSpacing port="sink_result 1" spacing="0"/> <portSpacing port="sink_result 2" spacing="0"/> <portSpacing port="sink_result 3" spacing="0"/> </process> </operator> </process>YY