"Loop over all combination of 2 attributes"

MuehliMan · July 2010

Hello again,

I am again asking to community for help, as my wisdom has come to and end here. Here is a short descriptoion of what the code should do:
read input --> preprocessing + data preparation --> loop over combination of 2 of attributes --> Build and evaluate Decision tree for all combinations --> Write Attributes and Feature names to log and then to csv

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process">
<process expanded="true" height="566" width="1619">
<operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="120">
<parameter key="file_name" value="E:\binary_preprocessed.csv"/>
<parameter key="comment_characters" value="*"/>
<parameter key="column_separators" value=","/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role" width="90" x="179" y="120">
<parameter key="name" value="ID"/>
<parameter key="target_role" value="id"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role (2)" width="90" x="313" y="120">
<parameter key="name" value="activity"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.0.8" expanded="true" height="76" name="only numeric" width="90" x="447" y="120">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="regular_expression" value="pKa1ACD10|pKa2ACD10"/>
<parameter key="value_type" value="numeric"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples" width="90" x="581" y="120">
<parameter key="condition_class" value="no_missing_attributes"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples (2)" width="90" x="715" y="120">
<parameter key="condition_class" value="no_missing_labels"/>
</operator>
<operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="849" y="120">
<parameter key="use_exact_number" value="true"/>
<parameter key="exact_number_of_attributes" value="2"/>
<parameter key="max_number_of_attributes" value="5"/>
<process expanded="true" height="665" width="1094">
<operator activated="true" class="extract_macro" compatibility="5.0.8" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
<parameter key="macro" value="atts"/>
<parameter key="macro_type" value="number_of_attributes"/>
</operator>
<operator activated="true" class="generate_macro" compatibility="5.0.8" expanded="true" height="76" name="treedepth" width="90" x="179" y="30">
<list key="function_descriptions">
<parameter key="treedepth" value="2 * %{atts} + 1"/>
</list>
</operator>
<operator activated="true" class="x_validation" compatibility="5.0.8" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="10"/>
<process expanded="true" height="647" width="424">
<operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30">
<parameter key="criterion" value="information_gain"/>
</operator>
<connect from_port="training" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="647" width="424">
<operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
<parameter key="create_view" value="true"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
<parameter key="main_criterion" value="youden"/>
<parameter key="youden" value="true"/>
<parameter key="psep" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="76" name="Log" width="90" x="447" y="30">
<parameter key="filename" value="fs_2_atts.log"/>
<list key="log">
<parameter key="youden" value="operator.Validation.value.performance"/>
<parameter key="psep" value="operator.Validation.value.performance2"/>
<parameter key="accuracy" value="operator.Validation.value.performance3"/>
<parameter key="feature_names" value="operator.Loop Subsets.value.feature_names"/>
<parameter key="feature_number" value="operator.Loop Subsets.value.feature_number"/>
<parameter key="deviation" value="operator.Validation.value.deviation"/>
</list>
<parameter key="sorting_type" value="top-k"/>
<parameter key="sorting_dimension" value="youden"/>
</operator>
<connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="treedepth" to_port="through 1"/>
<connect from_op="treedepth" from_port="through 1" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<portSpacing port="source_example set" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log_to_data" compatibility="5.0.8" expanded="true" height="94" name="Log to Data (2)" width="90" x="983" y="120"/>
<operator activated="true" class="write_csv" compatibility="5.0.8" expanded="true" height="60" name="Write CSV" width="90" x="1117" y="120">
<parameter key="csv_file" value="%{path}\%{set}_%{subset}_fs_2_atts.csv"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="only numeric" to_port="example set input"/>
<connect from_op="only numeric" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
<connect from_op="Loop Subsets" from_port="example set" to_op="Log to Data (2)" to_port="through 1"/>
<connect from_op="Log to Data (2)" from_port="exampleSet" to_op="Write CSV" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>

But unfortunately is is stopping after some time, crashing the whole programm. I dont know if i make an error in der workflow or maybe it is using too much memory. Well, maybe some of you guys can give mie a tips.

MuehliMan · July 2010

I just want to add, that it freezes the Process timer, for very long times. So maybe it is just using so much memory, that there is rarley any left for the programm itself. Is there anyoption to use only 90% of the available memory to keep something for the programm itself?

Cheers,
Markus

land · July 2010

Hi Markus,
this is not possible, otherwise we would have done so long time ago

I think you will need either more memory or test if there's some problem with the tree itself.
By the way: You are generating a tree_depth macro, but do not use it for the tree construction...

Greetings,
Sebastian

MuehliMan · July 2010

Hey Sebastian,

you are right. Treedepth was is not needed, as it is always 2 attributes. I think it is a bad idea to write/store the log at every iteration right? Would free memory help as it deletes all models and views?
How do I get the log updated within the loop and written only after the iteration is finished?

Exception in thread "AWT-EventQueue-0" java.lang.OutOfMemoryError: GC overhead limit exceeded
Exception in thread "AWT-EventQueue-0" java.lang.ArrayIndexOutOfBoundsException

Overall this message does not sound to good. This message appears not from the beginning, but after some time, most likely when the Process timer completely freezes.

Cheers,
Markus

land · July 2010

Hi Markus,
indeed this message does not sound good. But as far as I saw, your attributes are numerical, aren't they? That's important, because the tree can use numerical attributes for multiple splits. Thus the depth can be in worst case equal to number of examples and this would definitively result in an out of memory exception...

The free memory operator will not free anything that would not have been freed before java throws an out of memory exception.

Greetings,
Sebastian

MuehliMan · July 2010

Dear Sebastian,

Yes my attributes are numerical. To avoid a tree putting just one example into one split, I intent to use the minumum exaples per split feature and I set the maximum treedepth lower.
I tried to modifiy the workflow by writing the log not to the hard drive but to the memory, but it did not change much. Here is my version I am working with at the moment.

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.8" expanded="true" name="Process">
<parameter key="logverbosity" value="3"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="1"/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="parallelize_main_process" value="false"/>
<process expanded="true" height="566" width="1619">
<operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro" width="90" x="45" y="30">
<parameter key="macro" value="path"/>
<parameter key="value" value="C:\Daten"/>
</operator>
<operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro (2)" width="90" x="179" y="30">
<parameter key="macro" value="set"/>
<parameter key="value" value="ASM"/>
</operator>
<operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro (3)" width="90" x="313" y="30">
<parameter key="macro" value="subset"/>
<parameter key="value" value="full"/>
</operator>
<operator activated="true" class="set_macro" compatibility="5.0.8" expanded="true" height="76" name="Set Macro (4)" width="90" x="447" y="30">
<parameter key="macro" value="subset"/>
<parameter key="value" value="full"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.0.8" expanded="true" height="60" name="Read CSV" width="90" x="45" y="120">
<parameter key="file_name" value="%{path}\input_preprocessed.csv"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="trim_lines" value="false"/>
<parameter key="skip_comments" value="true"/>
<parameter key="comment_characters" value="*"/>
<parameter key="use_first_row_as_attribute_names" value="true"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="""/>
<parameter key="column_separators" value=","/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="date_format" value="yyyy-MM-dd"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role" width="90" x="179" y="120">
<parameter key="name" value="CID"/>
<parameter key="target_role" value="id"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.8" expanded="true" height="76" name="Set Role (2)" width="90" x="313" y="120">
<parameter key="name" value="activity"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.0.8" expanded="true" height="76" name="only numeric" width="90" x="447" y="120">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="attribute" value=""/>
<parameter key="regular_expression" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="11"/>
<parameter key="block_type" value="0"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="8"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples" width="90" x="581" y="120">
<parameter key="condition_class" value="no_missing_attributes"/>
<parameter key="invert_filter" value="false"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="5.0.8" expanded="true" height="76" name="Filter Examples (2)" width="90" x="715" y="120">
<parameter key="condition_class" value="no_missing_labels"/>
<parameter key="invert_filter" value="false"/>
</operator>
<operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="849" y="120">
<parameter key="use_exact_number" value="true"/>
<parameter key="exact_number_of_attributes" value="2"/>
<parameter key="min_number_of_attributes" value="1"/>
<parameter key="limit_max_number" value="false"/>
<parameter key="max_number_of_attributes" value="5"/>
<parameter key="parallelize_subprocess" value="false"/>
<process expanded="true" height="665" width="1094">
<operator activated="true" class="extract_macro" compatibility="5.0.8" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
<parameter key="macro" value="atts"/>
<parameter key="macro_type" value="number_of_attributes"/>
<parameter key="statistics" value="0"/>
<parameter key="attribute_name" value=""/>
</operator>
<operator activated="false" class="generate_macro" compatibility="5.0.8" expanded="true" height="60" name="treedepth" width="90" x="179" y="30">
<list key="function_descriptions">
<parameter key="treedepth" value="2 * %{atts} + 1"/>
</list>
<parameter key="use_standard_constants" value="true"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.0.8" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<parameter key="create_complete_model" value="false"/>
<parameter key="average_performances_only" value="true"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_validations" value="10"/>
<parameter key="sampling_type" value="2"/>
<parameter key="use_local_random_seed" value="true"/>
<parameter key="local_random_seed" value="10"/>
<parameter key="parallelize_training" value="false"/>
<parameter key="parallelize_testing" value="false"/>
<process expanded="true" height="647" width="424">
<operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30">
<parameter key="criterion" value="information_gain"/>
<parameter key="minimal_size_for_split" value="1"/>
<parameter key="minimal_leaf_size" value="1"/>
<parameter key="minimal_gain" value="0.05"/>
<parameter key="maximal_depth" value="%{treedepth}"/>
<parameter key="confidence" value="0.25"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
<parameter key="no_pre_pruning" value="false"/>
<parameter key="no_pruning" value="false"/>
</operator>
<connect from_port="training" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="647" width="424">
<operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
<parameter key="create_view" value="true"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
<parameter key="main_criterion" value="youden"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="false"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="false"/>
<parameter key="recall" value="false"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="false"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="true"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="true"/>
<parameter key="skip_undefined_labels" value="false"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="76" name="Log" width="90" x="447" y="30">
<parameter key="filename" value="%{path}\%{set}_%{subset}_fs_2_atts.log"/>
<list key="log">
<parameter key="youden" value="operator.Validation.value.performance"/>
<parameter key="psep" value="operator.Validation.value.performance2"/>
<parameter key="accuracy" value="operator.Validation.value.performance3"/>
<parameter key="feature_names" value="operator.Loop Subsets.value.feature_names"/>
<parameter key="feature_number" value="operator.Loop Subsets.value.feature_number"/>
<parameter key="deviation" value="operator.Validation.value.deviation"/>
</list>
<parameter key="sorting_type" value="top-k"/>
<parameter key="sorting_dimension" value="youden"/>
<parameter key="sorting_k" value="100"/>
<parameter key="persistent" value="false"/>
</operator>
<operator activated="true" class="log_to_data" compatibility="5.0.8" expanded="true" height="94" name="Log to Data (2)" width="90" x="581" y="30"/>
<operator activated="true" class="remember" compatibility="5.0.8" expanded="true" height="60" name="Remember" width="90" x="715" y="30">
<parameter key="name" value="stack"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="store_which" value="1"/>
<parameter key="remove_from_process" value="true"/>
</operator>
<operator activated="true" class="free_memory" compatibility="5.0.8" expanded="true" height="76" name="Free Memory" width="90" x="849" y="30"/>
<connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_op="Log to Data (2)" to_port="through 1"/>
<connect from_op="Log to Data (2)" from_port="exampleSet" to_op="Remember" to_port="store"/>
<connect from_op="Remember" from_port="stored" to_op="Free Memory" to_port="through 1"/>
<portSpacing port="source_example set" spacing="0"/>
</process>
</operator>
<operator activated="true" class="recall" compatibility="5.0.8" expanded="true" height="60" name="Recall" width="90" x="45" y="345">
<parameter key="name" value="stack"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="remove_from_store" value="true"/>
</operator>
<operator activated="true" class="write_excel" compatibility="5.0.8" expanded="true" height="60" name="Write Excel" width="90" x="179" y="345">
<parameter key="excel_file" value="%{Path}/fs_2_atts.xls"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<connect from_port="input 1" to_op="Set Macro" to_port="through 1"/>
<connect from_op="Set Macro" from_port="through 1" to_op="Set Macro (2)" to_port="through 1"/>
<connect from_op="Set Macro (2)" from_port="through 1" to_op="Set Macro (3)" to_port="through 1"/>
<connect from_op="Set Macro (3)" from_port="through 1" to_op="Set Macro (4)" to_port="through 1"/>
<connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="only numeric" to_port="example set input"/>
<connect from_op="only numeric" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Loop Subsets" to_port="example set"/>
<connect from_op="Recall" from_port="result" to_op="Write Excel" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>

After some runtime the log says for every iteration:

Exception in thread "AWT-EventQueue-0" java.lang.ArrayIndexOutOfBoundsException
at javax.swing.text.BoxView.updateLayoutArray(BoxView.java:196)
at javax.swing.text.BoxView.replace(BoxView.java:168)
at javax.swing.text.View.updateChildren(View.java:1095)
at javax.swing.text.View.insertUpdate(View.java:679)
at javax.swing.plaf.basic.BasicTextUI$RootView.insertUpdate(BasicTextUI.java:1590)
at javax.swing.plaf.basic.BasicTextUI$UpdateHandler.insertUpdate(BasicTextUI.java:1849)
at javax.swing.text.AbstractDocument.fireInsertUpdate(AbstractDocument.java:185)
at javax.swing.text.AbstractDocument.handleInsertString(AbstractDocument.java:734)
at javax.swing.text.AbstractDocument.insertString(AbstractDocument.java:693)
at com.rapidminer.gui.tools.LoggingViewer.append(LoggingViewer.java:300)
at com.rapidminer.gui.tools.LoggingViewer.access$000(LoggingViewer.java:83)
at com.rapidminer.gui.tools.LoggingViewer$2$1.run(LoggingViewer.java:186)
at java.awt.event.InvocationEvent.dispatch(InvocationEvent.java:209)
at java.awt.EventQueue.dispatchEvent(EventQueue.java:597)
at java.awt.EventDispatchThread.pumpOneEventForFilters(EventDispatchThread.java:269)
at java.awt.EventDispatchThread.pumpEventsForFilter(EventDispatchThread.java:184)
at java.awt.EventDispatchThread.pumpEventsForHierarchy(EventDispatchThread.java:174)
at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:169)
at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:161)
at java.awt.EventDispatchThread.run(EventDispatchThread.java:122)

I also tried to run it with a lower number of attributes (like 100 random by "Generate data") and it seems to work there. If someone experienced could help me please, I just dont know here the mistake is.

Best regards,
Markus

haddock · July 2010

Hi Markus,

I think your code choked the cat because it kept storing extra copies of the log in memory. Here is code that selects all attribute pairs and logs their classification performance, hope that is what you had in mind.

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="">
    <process expanded="true" height="365" width="748">
      <operator activated="true" class="retrieve" compatibility="5.0.8" expanded="true" height="60" name="Retrieve" width="90" x="179" y="75">
        <parameter key="repository_entry" value="//Samples/data/Golf"/>
      </operator>
      <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="380" y="75">
        <parameter key="use_exact_number" value="true"/>
        <parameter key="exact_number_of_attributes" value="2"/>
        <process expanded="true" height="380" width="815">
          <operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30"/>
          <operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="313" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="447" y="30">
            <list key="class_weights"/>
          </operator>
          <operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="94" name="Log" width="90" x="581" y="30">
            <list key="log">
              <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
              <parameter key="Performance" value="operator.Performance.value.accuracy"/>
            </list>
          </operator>
          <connect from_port="example set" to_op="Decision Tree" to_port="training set"/>
          <connect from_op="Decision Tree" from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Decision Tree" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
          <connect from_op="Performance" from_port="example set" to_op="Log" to_port="through 2"/>
          <portSpacing port="source_example set" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Loop Subsets" to_port="example set"/>
      <connect from_op="Loop Subsets" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

MuehliMan · July 2010

Thank you for your workflow!

So using a cross-validation (X-Val) I would connect Performance Evaluation with the average output of the validation, and then the output of the X-Validation with the log operator, right?

But how do I get the log written to a file (as data or even better example file) without this being done every single step?

I agree, that writing a copy of the log file every iteration is killing the process. But i would still need the result of this search saved somewhere.

Cheers,
Markus

haddock · July 2010

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="">
    <process expanded="true" height="365" width="748">
      <operator activated="true" class="retrieve" compatibility="5.0.8" expanded="true" height="60" name="Retrieve" width="90" x="179" y="75">
        <parameter key="repository_entry" value="//Samples/data/Golf"/>
      </operator>
      <operator activated="true" class="loop_attribute_subsets" compatibility="5.0.8" expanded="true" height="60" name="Loop Subsets" width="90" x="380" y="75">
        <parameter key="use_exact_number" value="true"/>
        <parameter key="exact_number_of_attributes" value="2"/>
        <process expanded="true" height="380" width="815">
          <operator activated="true" class="decision_tree" compatibility="5.0.8" expanded="true" height="76" name="Decision Tree" width="90" x="112" y="30"/>
          <operator activated="true" class="apply_model" compatibility="5.0.8" expanded="true" height="76" name="Apply Model" width="90" x="313" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="5.0.8" expanded="true" height="76" name="Performance" width="90" x="447" y="30">
            <list key="class_weights"/>
          </operator>
          <operator activated="true" class="log" compatibility="5.0.8" expanded="true" height="94" name="Log" width="90" x="581" y="30">
            <list key="log">
              <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
              <parameter key="Performance" value="operator.Performance.value.accuracy"/>
            </list>
          </operator>
          <connect from_port="example set" to_op="Decision Tree" to_port="training set"/>
          <connect from_op="Decision Tree" from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Decision Tree" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_op="Log" to_port="through 1"/>
          <connect from_op="Performance" from_port="example set" to_op="Log" to_port="through 2"/>
          <portSpacing port="source_example set" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="log_to_data" compatibility="5.0.8" expanded="true" height="94" name="Log to Data" width="90" x="571" y="73"/>
      <connect from_op="Retrieve" from_port="output" to_op="Loop Subsets" to_port="example set"/>
      <connect from_op="Loop Subsets" from_port="example set" to_op="Log to Data" to_port="through 1"/>
      <connect from_op="Log to Data" from_port="exampleSet" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

"Loop over all combination of 2 attributes"

Answers