The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here

Market Basket Analysis

MohamadJavadMohamadJavad Member Posts: 1 Learner I
edited September 2019 in Help
I want to use market basket analysis for Data that I have (Attached file), which columns should I select?
and which operator should I select? could you guide me step by step to figure out the association rule so that i could find correlation between products which are bought together.
many thanks


  • yyhuangyyhuang Administrator, Employee-RapidMiner, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 364 RM Data Scientist
    Hi @MohamadJavad,

    Thanks for sharing the transaction data set. The improved FP-Growth operator for MB analysis can take various input formats. Please check out the tutorial process from the help docs for the detailed explanation of the acceptable formats

    See the second tutorial for examples. As discussed in detail in the description, this Operator supports several different formats for the input data.

    • item list in a column: All the items belonging to a transaction appear in a single column, separated by item separators, in a CSV-like format.
    • items in separate columns: All the items belonging to a transaction appear in separate columns, with the first item name appearing in the first column, the second item name in the second column, etc.
    • items in dummy coded columns: Every item in the set of all items has its own column, and the item name is the column name. For each transaction, the binominal values (true/false) indicate whether the item can be found in the basket.
    Based on the columns given in the sample set, I concatenated the purchased item ID and descriptions in my process. You can use either one. 
    Since the sample data is collected in the same day. I assume each line in raw data lists one item purchased by one customer, so I aggregated transactions by customer ID.

    <?xml version="1.0" encoding="UTF-8"?><process version="9.4.000-BETA2">
      <operator activated="true" class="process" compatibility="9.4.000-BETA2" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="1"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.4.000-BETA2" expanded="true" height="68" name="Retrieve تراکنش-20" width="90" x="313" y="34">
            <parameter key="repository_entry" value="//RM YY Loal Repository/from Community/data_tmp/تراکنش-20"/>
          <operator activated="true" class="numerical_to_polynominal" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="numeric"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="real"/>
            <parameter key="block_type" value="value_series"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_series_end"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          <operator activated="true" class="aggregate" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Aggregate" width="90" x="581" y="34">
            <parameter key="use_default_aggregation" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="default_aggregation_function" value="average"/>
            <list key="aggregation_attributes">
              <parameter key="ItemID" value="concatenation"/>
              <parameter key="ItemName" value="concatenation"/>
            <parameter key="group_by_attributes" value="dt|CustomerID"/>
            <parameter key="count_all_combinations" value="false"/>
            <parameter key="only_distinct" value="false"/>
            <parameter key="ignore_missings" value="true"/>
          <operator activated="true" class="select_attributes" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Select Attributes" width="90" x="715" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value="CustomerID|concat(ItemName)"/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
          <operator activated="true" class="set_role" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Set Role" width="90" x="849" y="34">
            <parameter key="attribute_name" value="CustomerID"/>
            <parameter key="target_role" value="id"/>
            <list key="set_additional_roles"/>
          <operator activated="true" class="concurrency:fp_growth" compatibility="9.4.000-BETA2" expanded="true" height="82" name="FP-Growth" width="90" x="983" y="34">
            <parameter key="input_format" value="item list in a column"/>
            <parameter key="item_separators" value="|"/>
            <parameter key="use_quotes" value="false"/>
            <parameter key="quotes_character" value="&quot;"/>
            <parameter key="escape_character" value="\"/>
            <parameter key="trim_item_names" value="true"/>
            <parameter key="min_requirement" value="frequency"/>
            <parameter key="min_support" value="0.95"/>
            <parameter key="min_frequency" value="5"/>
            <parameter key="min_items_per_itemset" value="1"/>
            <parameter key="max_items_per_itemset" value="0"/>
            <parameter key="max_number_of_itemsets" value="1000000"/>
            <parameter key="find_min_number_of_itemsets" value="true"/>
            <parameter key="min_number_of_itemsets" value="100"/>
            <parameter key="max_number_of_retries" value="15"/>
            <parameter key="requirement_decrease_factor" value="0.9"/>
            <enumeration key="must_contain_list"/>
          <operator activated="true" class="create_association_rules" compatibility="9.4.000-BETA2" expanded="true" height="82" name="Create Association Rules (2)" width="90" x="1184" y="85">
            <parameter key="criterion" value="confidence"/>
            <parameter key="min_confidence" value="0.8"/>
            <parameter key="min_criterion_value" value="0.8"/>
            <parameter key="gain_theta" value="2.0"/>
            <parameter key="laplace_k" value="1.0"/>
          <connect from_op="Retrieve تراکنش-20" from_port="output" to_op="Numerical to Polynominal" to_port="example set input"/>
          <connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="FP-Growth" to_port="example set"/>
          <connect from_op="FP-Growth" from_port="example set" to_port="result 1"/>
          <connect from_op="FP-Growth" from_port="frequent sets" to_op="Create Association Rules (2)" to_port="item sets"/>
          <connect from_op="Create Association Rules (2)" from_port="rules" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>


Sign In or Register to comment.