GSP Operator: Wrong connection while passing sequential patterns to Create Association Rules out of
Hello,
while the GSP operator works fine I cannot pass the pattern to the Create Association Rules operator. I get the following error message:
<?xml version="1.0" encoding="UTF-8"?><process version="7.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.2.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.2.001" expanded="true" height="82" name="Data Preparation" width="90" x="179" y="34">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.2.001" expanded="true" height="82" name="Pivot data" width="90" x="45" y="34">
<process expanded="true">
<operator activated="false" class="jdbc_connectors:read_database" compatibility="7.2.001" expanded="true" height="68" name="Read Database" width="90" x="246" y="646">
<parameter key="connection" value="DB_NAME"/>
<parameter key="query" value="SELECT * FROM "ELAT"."TBLBIT" WHERE ROWNUM < 1000"/>
<enumeration key="parameters"/>
</operator>
<operator activated="false" class="jdbc_connectors:stream_database" compatibility="7.2.001" expanded="true" height="68" name="Stream Database" width="90" x="380" y="595">
<parameter key="connection" value="DB_NAME"/>
<parameter key="table_name" value="ZZ_RM_ASS"/>
<parameter key="recreate_index" value="true"/>
</operator>
<operator activated="true" class="jdbc_connectors:read_database" compatibility="7.2.001" expanded="true" height="68" name="Read Database (2)" width="90" x="45" y="34">
<parameter key="connection" value="DB_NAME"/>
<parameter key="query" value="SELECT * FROM "ELATAPEX"."ZZ_RM_ASS" WHERE ROWNUM < 10000 "/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.2.001" expanded="true" height="82" name="Select Attributes (1)" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="BITID|TBLUNIQUELRU_ID|TIMESTAMP|EVENT"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.2.001" expanded="true" height="82" name="Generate Concatenation" width="90" x="179" y="136">
<parameter key="first_attribute" value="TBLUNIQUELRU_ID"/>
<parameter key="second_attribute" value="TIMESTAMP"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.2.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="238">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="BITID|EVENT|TBLUNIQUELRU_ID_TIMESTAMP"/>
</operator>
<operator activated="true" class="pivot" compatibility="7.2.001" expanded="true" height="82" name="Pivot" width="90" x="380" y="34">
<parameter key="group_attribute" value="TBLUNIQUELRU_ID_TIMESTAMP"/>
<parameter key="index_attribute" value="BITID"/>
<parameter key="consider_weights" value="false"/>
<parameter key="skip_constant_attributes" value="false"/>
<parameter key="datamanagement" value="float_array"/>
</operator>
<operator activated="true" class="split" compatibility="7.2.001" expanded="true" height="82" name="Split" width="90" x="514" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TBLUNIQUELRU_ID_TIMESTAMP"/>
<parameter key="split_pattern" value="_"/>
</operator>
<connect from_op="Read Database (2)" from_port="output" to_op="Select Attributes (1)" to_port="example set input"/>
<connect from_op="Select Attributes (1)" from_port="example set output" to_op="Generate Concatenation" to_port="example set input"/>
<connect from_op="Generate Concatenation" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.2.001" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
<process expanded="true">
<operator activated="true" class="rename" compatibility="7.2.001" expanded="true" height="82" name="Rename TBLUNIQUELRU_ID" width="90" x="45" y="34">
<parameter key="old_name" value="TBLUNIQUELRU_ID_TIMESTAMP_1"/>
<parameter key="new_name" value="TBLUNIQUELRU_ID"/>
<list key="rename_additional_attributes">
<parameter key="TBLUNIQUELRU_ID_TIMESTAMP_2" value="TIMESTAMP"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.2.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="TBLUNIQUELRU_ID_TIMESTAMP_3"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="nominal_to_date" compatibility="7.2.001" expanded="true" height="82" name="Nominal to Date" width="90" x="313" y="187">
<parameter key="attribute_name" value="TIMESTAMP"/>
<parameter key="date_type" value="date_time"/>
<parameter key="date_format" value="d/M/yyyy H:m:s"/>
<parameter key="time_zone" value="CET"/>
<parameter key="locale" value="German (Germany)"/>
</operator>
<operator activated="true" breakpoints="after" class="date_to_numerical" compatibility="7.2.001" expanded="true" height="82" name="Date to Numerical" width="90" x="447" y="187">
<parameter key="attribute_name" value="TIMESTAMP"/>
<parameter key="time_unit" value="minute"/>
<parameter key="minute_relative_to" value="day"/>
<parameter key="day_relative_to" value="year"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.2.001" expanded="true" height="82" name="Rename Att. EVENT_" width="90" x="581" y="34">
<parameter key="regular_expression" value="EVENT_"/>
<parameter key="replace_what" value="EVENT_"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.2.001" expanded="true" height="82" name="Rename Attr. .0" width="90" x="715" y="34">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value="[0-9]*[.][0]"/>
<parameter key="replace_what" value="[.]0"/>
</operator>
<operator activated="true" class="replace" compatibility="7.2.001" expanded="true" height="82" name="Replace [A-Z]" width="90" x="849" y="187">
<parameter key="regular_expression" value="' '[0-9][.][0-9]"/>
<parameter key="replace_what" value="[A-Z]+"/>
<parameter key="replace_by" value="Y"/>
</operator>
<connect from_port="in 1" to_op="Rename TBLUNIQUELRU_ID" to_port="example set input"/>
<connect from_op="Rename TBLUNIQUELRU_ID" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Nominal to Date" to_port="example set input"/>
<connect from_op="Nominal to Date" from_port="example set output" to_op="Date to Numerical" to_port="example set input"/>
<connect from_op="Date to Numerical" from_port="example set output" to_op="Rename Att. EVENT_" to_port="example set input"/>
<connect from_op="Rename Att. EVENT_" from_port="example set output" to_op="Rename Attr. .0" to_port="example set input"/>
<connect from_op="Rename Attr. .0" from_port="example set output" to_op="Replace [A-Z]" to_port="example set input"/>
<connect from_op="Replace [A-Z]" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="7.2.001" expanded="true" height="103" name="Replace Missing Values (2)" width="90" x="313" y="34">
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="TBLUNIQUELRU_ID"/>
<parameter key="attributes" value="TIMESTAMP|TBLUNIQUELRU_ID"/>
<parameter key="invert_selection" value="true"/>
<parameter key="default" value="value"/>
<list key="columns"/>
<parameter key="replenishment_value" value="N"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="7.2.001" expanded="true" height="103" name="Nominal to Binominal" width="90" x="447" y="34">
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Infinity|95|94|93|92|91|88|87|86|85|84|83|81|80|77|76|75|68|67|66|63|62|54|53|52|506|505|502|501|475|43|42|41|40|39|37|36|35|34|33|32|26|25|23|19|17|140|132|129|128|127|126|125|124|123|121|120|110|11|107|106"/>
<parameter key="regular_expression" value="[0-9]+"/>
</operator>
<connect from_op="Pivot data" from_port="out 1" to_op="Rename" to_port="in 1"/>
<connect from_op="Rename" from_port="out 1" to_op="Replace Missing Values (2)" to_port="example set input"/>
<connect from_op="Replace Missing Values (2)" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="generalized_sequential_patterns" compatibility="7.2.001" expanded="true" height="82" name="GSP" width="90" x="313" y="34">
<parameter key="customer_id" value="TBLUNIQUELRU_ID"/>
<parameter key="time_attribute" value="TIMESTAMP"/>
<parameter key="min_support" value="0.8"/>
<parameter key="window_size" value="5.0"/>
<parameter key="max_gap" value="50.0"/>
<parameter key="min_gap" value="10.0"/>
<parameter key="positive_value" value="Y"/>
</operator>
<operator activated="true" class="create_association_rules" compatibility="7.2.001" expanded="true" height="82" name="Create Association Rules" width="90" x="514" y="136"/>
<connect from_op="Data Preparation" from_port="out 1" to_op="GSP" to_port="example set"/>
<connect from_op="GSP" from_port="patterns" to_op="Create Association Rules" to_port="item sets"/>
<connect from_op="Create Association Rules" from_port="rules" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
The GSP input port tells that one attribute is missing, which is not the case. It's one of RapidMiner's warnings which can be ignored.
Can someone help? The GSP tutorial in the Operator Reference Manual doesn't use the Create Association Rules operator, which is nessecary, isn't it?
Kind regards,
Ina
Best Answer
-
Thomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,761 Unicorn
The problem is that you're trying to pass the PAT port (GSP) to the ITE port (Create Association Rules).
You will need to put a FP-Growth on the EXA output port from the GSP and then connect the Create Associaiton Rules.
Like so.<?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.4.000" expanded="true" height="82" name="Data Preparation" width="90" x="179" y="34">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.4.000" expanded="true" height="82" name="Pivot data" width="90" x="45" y="34">
<process expanded="true">
<operator activated="false" class="jdbc_connectors:read_database" compatibility="7.4.000" expanded="true" height="68" name="Read Database" width="90" x="246" y="646">
<parameter key="connection" value="DB_NAME"/>
<parameter key="query" value="SELECT * FROM "ELAT"."TBLBIT" WHERE ROWNUM < 1000"/>
<enumeration key="parameters"/>
</operator>
<operator activated="false" class="jdbc_connectors:stream_database" compatibility="7.4.000" expanded="true" height="68" name="Stream Database" width="90" x="380" y="595">
<parameter key="connection" value="DB_NAME"/>
<parameter key="table_name" value="ZZ_RM_ASS"/>
<parameter key="recreate_index" value="true"/>
</operator>
<operator activated="true" class="jdbc_connectors:read_database" compatibility="7.4.000" expanded="true" height="68" name="Read Database (2)" width="90" x="45" y="34">
<parameter key="connection" value="DB_NAME"/>
<parameter key="query" value="SELECT * FROM "ELATAPEX"."ZZ_RM_ASS" WHERE ROWNUM < 10000 "/>
<enumeration key="parameters"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.4.000" expanded="true" height="82" name="Select Attributes (1)" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="BITID|TBLUNIQUELRU_ID|TIMESTAMP|EVENT"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.4.000" expanded="true" height="82" name="Generate Concatenation" width="90" x="179" y="136">
<parameter key="first_attribute" value="TBLUNIQUELRU_ID"/>
<parameter key="second_attribute" value="TIMESTAMP"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.4.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="238">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="BITID|EVENT|TBLUNIQUELRU_ID_TIMESTAMP"/>
</operator>
<operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="380" y="34">
<parameter key="group_attribute" value="TBLUNIQUELRU_ID_TIMESTAMP"/>
<parameter key="index_attribute" value="BITID"/>
<parameter key="consider_weights" value="false"/>
<parameter key="skip_constant_attributes" value="false"/>
<parameter key="datamanagement" value="float_array"/>
</operator>
<operator activated="true" class="split" compatibility="7.4.000" expanded="true" height="82" name="Split" width="90" x="514" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TBLUNIQUELRU_ID_TIMESTAMP"/>
<parameter key="split_pattern" value="_"/>
</operator>
<connect from_op="Read Database (2)" from_port="output" to_op="Select Attributes (1)" to_port="example set input"/>
<connect from_op="Select Attributes (1)" from_port="example set output" to_op="Generate Concatenation" to_port="example set input"/>
<connect from_op="Generate Concatenation" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Pivot" to_port="example set input"/>
<connect from_op="Pivot" from_port="example set output" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.4.000" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
<process expanded="true">
<operator activated="true" class="rename" compatibility="7.4.000" expanded="true" height="82" name="Rename TBLUNIQUELRU_ID" width="90" x="45" y="34">
<parameter key="old_name" value="TBLUNIQUELRU_ID_TIMESTAMP_1"/>
<parameter key="new_name" value="TBLUNIQUELRU_ID"/>
<list key="rename_additional_attributes">
<parameter key="TBLUNIQUELRU_ID_TIMESTAMP_2" value="TIMESTAMP"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.4.000" expanded="true" height="82" name="Select Attributes (3)" width="90" x="179" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="TBLUNIQUELRU_ID_TIMESTAMP_3"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="nominal_to_date" compatibility="7.4.000" expanded="true" height="82" name="Nominal to Date" width="90" x="313" y="187">
<parameter key="attribute_name" value="TIMESTAMP"/>
<parameter key="date_type" value="date_time"/>
<parameter key="date_format" value="d/M/yyyy H:m:s"/>
<parameter key="time_zone" value="CET"/>
<parameter key="locale" value="German (Germany)"/>
</operator>
<operator activated="true" breakpoints="after" class="date_to_numerical" compatibility="7.4.000" expanded="true" height="82" name="Date to Numerical" width="90" x="447" y="187">
<parameter key="attribute_name" value="TIMESTAMP"/>
<parameter key="time_unit" value="minute"/>
<parameter key="minute_relative_to" value="day"/>
<parameter key="day_relative_to" value="year"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.4.000" expanded="true" height="82" name="Rename Att. EVENT_" width="90" x="581" y="34">
<parameter key="regular_expression" value="EVENT_"/>
<parameter key="replace_what" value="EVENT_"/>
</operator>
<operator activated="true" class="rename_by_replacing" compatibility="7.4.000" expanded="true" height="82" name="Rename Attr. .0" width="90" x="715" y="34">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value="[0-9]*[.][0]"/>
<parameter key="replace_what" value="[.]0"/>
</operator>
<operator activated="true" class="replace" compatibility="7.4.000" expanded="true" height="82" name="Replace [A-Z]" width="90" x="849" y="187">
<parameter key="regular_expression" value="' '[0-9][.][0-9]"/>
<parameter key="replace_what" value="[A-Z]+"/>
<parameter key="replace_by" value="Y"/>
</operator>
<connect from_port="in 1" to_op="Rename TBLUNIQUELRU_ID" to_port="example set input"/>
<connect from_op="Rename TBLUNIQUELRU_ID" from_port="example set output" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Nominal to Date" to_port="example set input"/>
<connect from_op="Nominal to Date" from_port="example set output" to_op="Date to Numerical" to_port="example set input"/>
<connect from_op="Date to Numerical" from_port="example set output" to_op="Rename Att. EVENT_" to_port="example set input"/>
<connect from_op="Rename Att. EVENT_" from_port="example set output" to_op="Rename Attr. .0" to_port="example set input"/>
<connect from_op="Rename Attr. .0" from_port="example set output" to_op="Replace [A-Z]" to_port="example set input"/>
<connect from_op="Replace [A-Z]" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values (2)" width="90" x="313" y="34">
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value="TBLUNIQUELRU_ID"/>
<parameter key="attributes" value="TIMESTAMP|TBLUNIQUELRU_ID"/>
<parameter key="invert_selection" value="true"/>
<parameter key="default" value="value"/>
<list key="columns"/>
<parameter key="replenishment_value" value="N"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="7.4.000" expanded="true" height="103" name="Nominal to Binominal" width="90" x="447" y="34">
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Infinity|95|94|93|92|91|88|87|86|85|84|83|81|80|77|76|75|68|67|66|63|62|54|53|52|506|505|502|501|475|43|42|41|40|39|37|36|35|34|33|32|26|25|23|19|17|140|132|129|128|127|126|125|124|123|121|120|110|11|107|106"/>
<parameter key="regular_expression" value="[0-9]+"/>
</operator>
<connect from_op="Pivot data" from_port="out 1" to_op="Rename" to_port="in 1"/>
<connect from_op="Rename" from_port="out 1" to_op="Replace Missing Values (2)" to_port="example set input"/>
<connect from_op="Replace Missing Values (2)" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="generalized_sequential_patterns" compatibility="7.4.000" expanded="true" height="82" name="GSP" width="90" x="313" y="34">
<parameter key="customer_id" value="TBLUNIQUELRU_ID"/>
<parameter key="time_attribute" value="TIMESTAMP"/>
<parameter key="min_support" value="0.8"/>
<parameter key="window_size" value="5.0"/>
<parameter key="max_gap" value="50.0"/>
<parameter key="min_gap" value="10.0"/>
<parameter key="positive_value" value="Y"/>
</operator>
<operator activated="true" class="fp_growth" compatibility="7.4.000" expanded="true" height="82" name="FP-Growth" width="90" x="447" y="34"/>
<operator activated="true" class="create_association_rules" compatibility="7.4.000" expanded="true" height="82" name="Create Association Rules" width="90" x="648" y="34"/>
<connect from_op="Data Preparation" from_port="out 1" to_op="GSP" to_port="example set"/>
<connect from_op="GSP" from_port="example set" to_op="FP-Growth" to_port="example set"/>
<connect from_op="FP-Growth" from_port="example set" to_op="Create Association Rules" to_port="item sets"/>
<connect from_op="Create Association Rules" from_port="rules" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>1
Answers
Hi,
Generate Ass. Rules is usually used with FP-Growth. I think it does not make sense to use it with GSP.
~Martin
Dortmund, Germany
Hello Thomas,
Hello mschmitz,
thanks a lot! I incorporated FP-Growth and voilá the rules were generated.
That was / is missing in the operator reference. For what I understand as sequential pattern analysis, the identified frequent sequences need to be put into the form of an association rule as well (?).
Regards,
Ina