The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Regression via classification
Hi,
I wish to run a classification algorithm on a regression task.
Generated a new attribute where the label attribute is now discrete.
Unfortunately, I can no longer, straightforward apply the Performance (Regression) operator.
I have to compute the absolute error myself using generate attributes.
Since I'm applying attribute selection, I'm doing this over and over again, which is really slow.
Is there a faster way to achieve this result?
Best regards,
Wessel
Attached an example process below
I wish to run a classification algorithm on a regression task.
Generated a new attribute where the label attribute is now discrete.
Unfortunately, I can no longer, straightforward apply the Performance (Regression) operator.
I have to compute the absolute error myself using generate attributes.
Since I'm applying attribute selection, I'm doing this over and over again, which is really slow.
Is there a faster way to achieve this result?
Best regards,
Wessel
Attached an example process below
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34">
<parameter key="use_exact_number" value="false"/>
<parameter key="exact_number_of_attributes" value="-1"/>
<parameter key="min_number_of_attributes" value="1"/>
<parameter key="limit_max_number" value="true"/>
<parameter key="max_number_of_attributes" value="2"/>
<process expanded="true">
<operator activated="true" class="x_prediction" compatibility="7.0.001" expanded="true" height="68" name="X-Prediction" width="90" x="45" y="34">
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_validations" value="10"/>
<parameter key="sampling_type" value="stratified sampling"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<process expanded="true">
<operator activated="true" class="k_nn" compatibility="7.0.001" expanded="true" height="82" name="k-NN" width="90" x="45" y="187">
<parameter key="k" value="3"/>
<parameter key="weighted_vote" value="false"/>
<parameter key="measure_types" value="MixedMeasures"/>
<parameter key="mixed_measure" value="MixedEuclideanDistance"/>
<parameter key="nominal_measure" value="NominalDistance"/>
<parameter key="numerical_measure" value="EuclideanDistance"/>
<parameter key="divergence" value="GeneralizedIDivergence"/>
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_degree" value="3.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
</operator>
<connect from_port="training" to_op="k-NN" to_port="training set"/>
<connect from_op="k-NN" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.0.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="85">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="unlabelled data" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="labelled data"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_unlabelled data" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_labelled data" spacing="0"/>
</process>
</operator>
<operator activated="true" class="performance_classification" compatibility="7.0.001" expanded="true" height="82" name="CP" width="90" x="179" y="34">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="false"/>
<parameter key="weighted_mean_recall" value="false"/>
<parameter key="weighted_mean_precision" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="45" y="187">
<list key="function_descriptions">
<parameter key="prediction(leadTime)" value="parse([prediction(leadTime)])"/>
<parameter key="leadTime" value="[leadTime_numeric]"/>
</list>
<parameter key="keep_all" value="true"/>
</operator>
<operator activated="true" class="performance_regression" compatibility="7.0.001" expanded="true" height="82" name="RP" width="90" x="179" y="187">
<parameter key="main_criterion" value="first"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="absolute_error" value="true"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="prediction_average" value="false"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="313" y="187">
<list key="log">
<parameter key="rp" value="operator.RP.value.performance"/>
<parameter key="cp" value="operator.CP.value.performance"/>
<parameter key="fn" value="operator.Loop Subsets.value.feature_names"/>
</list>
<parameter key="sorting_type" value="none"/>
<parameter key="sorting_k" value="100"/>
<parameter key="persistent" value="false"/>
</operator>
<connect from_port="example set" to_op="X-Prediction" to_port="example set"/>
<connect from_op="X-Prediction" from_port="labelled data" to_op="CP" to_port="labelled data"/>
<connect from_op="CP" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="RP" to_port="labelled data"/>
<connect from_op="RP" from_port="performance" to_op="Log" to_port="through 1"/>
<portSpacing port="source_example set" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
i can't get your processes in - somethings wrong with the xml.
Can't you simply use parse numbers on the prediction, swap the the label and prediction roles on the right hand side of x-val and use standard performance operator?
~Martin
Dortmund, Germany