The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
"Feature selection inside validation loop"
Dear All,
Is it possible to run feature selection inside cross validation?
I created the following process (pasted below) which I believe does exactly this.
But I'm afraid that the remember and recall of feature weights might return feature weights from the previous loop.
Best regards,
Wessel
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.006" expanded="true" name="Process">
<process expanded="true" height="409" width="678">
<operator activated="true" class="retrieve" compatibility="5.1.006" expanded="true" height="60" name="Retrieve" width="90" x="112" y="75">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.1.006" expanded="true" height="112" name="k-fold" width="90" x="246" y="75">
<parameter key="number_of_validations" value="144"/>
<process expanded="true" height="391" width="300">
<operator activated="true" class="weka:W-ReliefFAttributeEval" compatibility="5.1.000" expanded="true" height="76" name="W-ReliefFAttributeEval (2)" width="90" x="45" y="30"/>
<operator activated="true" class="select_by_weights" compatibility="5.1.006" expanded="true" height="94" name="Select by Weights (2)" width="90" x="180" y="30">
<parameter key="weight_relation" value="top k"/>
<parameter key="k" value="5"/>
</operator>
<operator activated="true" class="remember" compatibility="5.1.006" expanded="true" height="60" name="Remember" width="90" x="45" y="120">
<parameter key="name" value="f"/>
<parameter key="io_object" value="AttributeWeights"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="5.1.006" expanded="true" height="76" name="Naive Bayes" width="90" x="179" y="120"/>
<connect from_port="training" to_op="W-ReliefFAttributeEval (2)" to_port="example set"/>
<connect from_op="W-ReliefFAttributeEval (2)" from_port="weights" to_op="Select by Weights (2)" to_port="weights"/>
<connect from_op="W-ReliefFAttributeEval (2)" from_port="example set" to_op="Select by Weights (2)" to_port="example set input"/>
<connect from_op="Select by Weights (2)" from_port="example set output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Select by Weights (2)" from_port="weights" to_op="Remember" to_port="store"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="391" width="300">
<operator activated="true" class="recall" compatibility="5.1.006" expanded="true" height="60" name="Recall" width="90" x="45" y="30">
<parameter key="name" value="f"/>
<parameter key="io_object" value="AttributeWeights"/>
</operator>
<operator activated="true" class="select_by_weights" compatibility="5.1.006" expanded="true" height="94" name="Select by Weights (3)" width="90" x="180" y="30">
<parameter key="weight_relation" value="top k"/>
<parameter key="k" value="5"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.1.006" expanded="true" height="76" name="-1 Folds Model" width="90" x="45" y="120">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.1.006" expanded="true" height="76" name="Accuracy Fold" width="90" x="180" y="120">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="-1 Folds Model" to_port="model"/>
<connect from_port="test set" to_op="Select by Weights (3)" to_port="example set input"/>
<connect from_op="Recall" from_port="result" to_op="Select by Weights (3)" to_port="weights"/>
<connect from_op="Select by Weights (3)" from_port="example set output" to_op="-1 Folds Model" to_port="unlabelled data"/>
<connect from_op="-1 Folds Model" from_port="labelled data" to_op="Accuracy Fold" to_port="labelled data"/>
<connect from_op="Accuracy Fold" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="k-fold" to_port="training"/>
<connect from_op="k-fold" from_port="averagable 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="72"/>
<portSpacing port="sink_result 2" spacing="18"/>
</process>
</operator>
</process>
Is it possible to run feature selection inside cross validation?
I created the following process (pasted below) which I believe does exactly this.
But I'm afraid that the remember and recall of feature weights might return feature weights from the previous loop.
Best regards,
Wessel
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.006">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.006" expanded="true" name="Process">
<process expanded="true" height="409" width="678">
<operator activated="true" class="retrieve" compatibility="5.1.006" expanded="true" height="60" name="Retrieve" width="90" x="112" y="75">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.1.006" expanded="true" height="112" name="k-fold" width="90" x="246" y="75">
<parameter key="number_of_validations" value="144"/>
<process expanded="true" height="391" width="300">
<operator activated="true" class="weka:W-ReliefFAttributeEval" compatibility="5.1.000" expanded="true" height="76" name="W-ReliefFAttributeEval (2)" width="90" x="45" y="30"/>
<operator activated="true" class="select_by_weights" compatibility="5.1.006" expanded="true" height="94" name="Select by Weights (2)" width="90" x="180" y="30">
<parameter key="weight_relation" value="top k"/>
<parameter key="k" value="5"/>
</operator>
<operator activated="true" class="remember" compatibility="5.1.006" expanded="true" height="60" name="Remember" width="90" x="45" y="120">
<parameter key="name" value="f"/>
<parameter key="io_object" value="AttributeWeights"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="5.1.006" expanded="true" height="76" name="Naive Bayes" width="90" x="179" y="120"/>
<connect from_port="training" to_op="W-ReliefFAttributeEval (2)" to_port="example set"/>
<connect from_op="W-ReliefFAttributeEval (2)" from_port="weights" to_op="Select by Weights (2)" to_port="weights"/>
<connect from_op="W-ReliefFAttributeEval (2)" from_port="example set" to_op="Select by Weights (2)" to_port="example set input"/>
<connect from_op="Select by Weights (2)" from_port="example set output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Select by Weights (2)" from_port="weights" to_op="Remember" to_port="store"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="391" width="300">
<operator activated="true" class="recall" compatibility="5.1.006" expanded="true" height="60" name="Recall" width="90" x="45" y="30">
<parameter key="name" value="f"/>
<parameter key="io_object" value="AttributeWeights"/>
</operator>
<operator activated="true" class="select_by_weights" compatibility="5.1.006" expanded="true" height="94" name="Select by Weights (3)" width="90" x="180" y="30">
<parameter key="weight_relation" value="top k"/>
<parameter key="k" value="5"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.1.006" expanded="true" height="76" name="-1 Folds Model" width="90" x="45" y="120">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.1.006" expanded="true" height="76" name="Accuracy Fold" width="90" x="180" y="120">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="-1 Folds Model" to_port="model"/>
<connect from_port="test set" to_op="Select by Weights (3)" to_port="example set input"/>
<connect from_op="Recall" from_port="result" to_op="Select by Weights (3)" to_port="weights"/>
<connect from_op="Select by Weights (3)" from_port="example set output" to_op="-1 Folds Model" to_port="unlabelled data"/>
<connect from_op="-1 Folds Model" from_port="labelled data" to_op="Accuracy Fold" to_port="labelled data"/>
<connect from_op="Accuracy Fold" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="k-fold" to_port="training"/>
<connect from_op="k-fold" from_port="averagable 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="72"/>
<portSpacing port="sink_result 2" spacing="18"/>
</process>
</operator>
</process>
Tagged:
0
Answers
sure, this is possible. This works with Remember and Recall in principle but you actually would not need Remember and Recall at all but could use the "Through" port like in the process below.
Cheers,
Ingo