The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
apply multi label modeling - How to keep original attributes in a data set
Hello,
below is a model I build with training data (1) by using the multi label modeling operator. That particular model is applied to another data set (2). Number of attributes and Attribute names are the same in both data set. Only the values of the attributes differ. The goal is to filter wrong predictions & filter other examples based on customized filtering. For the latter one, original attributes of data set (2) are required.
However in the result example set of (2) the original attributes disappear and only the predictions are shown. If I set the original attributes as label (or any other roles) - only "one" original attribute will be shown in the example set of (2). And by doing this the multi label modeling performance - operator does not work since no labels are allowed anyway.
How can I keep the original attributes of data set (2)?
The filter examples operator within the multi label performance operator also does not work if I use the data set (2) to apply the trained model on (s. below). Theres an error note which says "attribute xx does not exist)". But if the model is trained on the data set (1) and also applied on the same date set (1) - wrong predictions can be filtered.
How can I filter wrong predictions of the data set (2)?
Thank you for the help!
0
Best Answer
-
varunm1 Member Posts: 1,207 UnicornHello @LeMarc
This seems a bit weird, maybe some issue between multilabel modeling and apply model integration or an intended one ( @tftemme and @mschmitz ).
To resolve this, I generated an ID for the dataset and used inner join based on the ID column to get the required columns (Survived & Port Embarkation). Then I generated attributes named (Survived_Flag, Port_Embarkation_Flag) by comparing the predictions and original values. This flag will have 1 if the prediction is true and 0 if predictions are false. Attached .rmp file can be imported to RM using FILE --> Import Process. You can then filter examples based on these two flag attributes.
Do let us know if this works.Regards,
Varun
https://www.varunmandalapu.com/Be Safe. Follow precautions and Maintain Social Distancing
6
Answers
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.6.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.6.000" expanded="true" height="68" name="Retrieve Titanic" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Titanic"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.6.000" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="no_missing_attributes"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list"/>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.6.000" expanded="true" height="82" name="Set Role" width="90" x="313" y="34">
<parameter key="attribute_name" value="Survived"/>
<parameter key="target_role" value="survived"/>
<list key="set_additional_roles">
<parameter key="Port of Embarkation" value="POE"/>
</list>
</operator>
<operator activated="true" class="time_series:multi_label_model_learner" compatibility="9.6.000" expanded="true" height="82" name="Multi Label Modeling" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="Survived|Port of Embarkation"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="add_macros" value="false"/>
<parameter key="current_label_name_macro" value="current_label_attribute"/>
<parameter key="current_label_type_macro" value="current_label_type"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="concurrency:parallel_random_forest" compatibility="9.6.000" expanded="true" height="103" name="Random Forest" width="90" x="380" y="34">
<parameter key="number_of_trees" value="100"/>
<parameter key="criterion" value="gain_ratio"/>
<parameter key="maximal_depth" value="10"/>
<parameter key="apply_pruning" value="false"/>
<parameter key="confidence" value="0.1"/>
<parameter key="apply_prepruning" value="false"/>
<parameter key="minimal_gain" value="0.01"/>
<parameter key="minimal_leaf_size" value="2"/>
<parameter key="minimal_size_for_split" value="4"/>
<parameter key="number_of_prepruning_alternatives" value="3"/>
<parameter key="random_splits" value="false"/>
<parameter key="guess_subset_ratio" value="true"/>
<parameter key="subset_ratio" value="0.2"/>
<parameter key="voting_strategy" value="confidence vote"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
</operator>
<connect from_port="training set" to_op="Random Forest" to_port="training set"/>
<connect from_op="Random Forest" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="retrieve" compatibility="9.6.000" expanded="true" height="68" name="Retrieve Titanic (2)" width="90" x="179" y="187">
<parameter key="repository_entry" value="//Samples/data/Titanic"/>
</operator>
<operator activated="true" class="apply_model" compatibility="9.6.000" expanded="true" height="82" name="Apply Model" width="90" x="380" y="187">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<connect from_op="Retrieve Titanic" from_port="output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Multi Label Modeling" to_port="training set"/>
<connect from_op="Multi Label Modeling" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Retrieve Titanic (2)" from_port="output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>