The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
How to configure cost matrix for MetaCost operator
Hello,
I am struggling with correctly setting up a cost matrix for the MetaCost operator. The documentation on it is quite sparse and even after reading many posts on this forum, I cannot find my answer. I also
Here is the cost matrix for the default tutorial process for the MetaCost operator (distinguishing mines from rocks in the Sonar dataset):
Class 1 is Rock; Class 2 is Mine.
That image refers to the Matlab cost matrix format (which I think is here: https://www.mathworks.com/help/stats/classification-with-unequal-misclassification-costs.html), but I still have many questions:
- I assume that the 2.0 and 3.0 are costs (penalties) for misclassification, since they are for wrong predictions. The Matlab instructions say that the true positive (TP) and true negative (TN) diagonal is supposed to be left at 0, but this does not make sense to me if I have benefits. Would they not be negative (opposite of costs) in that case?
Here is what I would think:
That is, with "yes" as the positive class:
- True positive: earns 45€, so cost is -45
- True negative: we spend nothing and gain nothing, so cost is 0
- False positive: we spent 5€ to call a customer but gained nothing, so cost is 5
- False negative: we spent nothing, but missed the opportunity of receiving 45€ profit, so cost is 45
So, I would appreciate clear guidance on how to correctly configure the cost matrix.
Regards,
Chitu
Tagged:
0
Answers
Dortmund, Germany
- The documentation note "The cost matrix in Matlab single line format" is very confusing. When I looked up the Matlab documentation ( https://www.mathworks.com/help/stats/classification-with-unequal-misclassification-costs.html), it explicitly says, "The diagonal elements C(i,i) of the cost matrix must be 0", which directly contradicts your very intuitive business matrix.
- Your business matrix express numbers positively as gains or benefits and costs as negative, but "cost matrix" implies that gains or benefits should be expressed negatively and costs positively. Which is it?
Thanks,Dortmund, Germany
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.8.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.8.000" expanded="true" height="68" name="Retrieve prepped data" width="90" x="313" y="85">
<parameter key="repository_entry" value="//Demo Project/Direct Marketing/data/prepped data"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="9.8.000" expanded="true" height="103" name="Nominal to Binominal" width="90" x="447" y="85">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="false"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Response"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="transform_binominal" value="false"/>
<parameter key="use_underscore_in_name" value="false"/>
</operator>
<operator activated="true" class="remap_binominals" compatibility="9.8.000" expanded="true" height="82" name="Remap Binominals" width="90" x="581" y="85">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Response"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="binominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="binominal"/>
<parameter key="block_type" value="value_matrix_start"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="negative_value" value="No"/>
<parameter key="positive_value" value="Yes"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.8.000" expanded="true" height="82" name="Set Role" width="90" x="715" y="85">
<parameter key="attribute_name" value="Response"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="metacost" compatibility="9.8.000" expanded="true" height="82" name="MetaCost" width="90" x="849" y="85">
<parameter key="cost_matrix" value="[-47.0 3.0;10.0 0.0]"/>
<parameter key="use_subset_for_training" value="1.0"/>
<parameter key="iterations" value="10"/>
<parameter key="sampling_with_replacement" value="true"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<process expanded="true">
<operator activated="true" class="concurrency:parallel_decision_tree" compatibility="9.8.000" expanded="true" height="103" name="Decision Tree" width="90" x="313" y="34">
<parameter key="criterion" value="gain_ratio"/>
<parameter key="maximal_depth" value="10"/>
<parameter key="apply_pruning" value="true"/>
<parameter key="confidence" value="0.1"/>
<parameter key="apply_prepruning" value="true"/>
<parameter key="minimal_gain" value="0.01"/>
<parameter key="minimal_leaf_size" value="2"/>
<parameter key="minimal_size_for_split" value="4"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
</operator>
<connect from_port="training set" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve prepped data" from_port="output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="Remap Binominals" to_port="example set input"/>
<connect from_op="Remap Binominals" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="MetaCost" to_port="training set"/>
<connect from_op="MetaCost" from_port="model" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Dortmund, Germany