The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
logistic regression operator and weights
Telcontar120
RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,635 Unicorn
The operator information for the base logistic regression learner indicates it does not accept weighted examples (see screenshot). However, if you actually test this by running a model on weighted vs unweighted examples, it is very clear that the resulting model is different, so it does appear that weighting is affecting this operator. See the example process here:
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="8.1.001" expanded="true" height="68" name="Retrieve Counterparty Risk Data" width="90" x="112" y="34">
<parameter key="repository_entry" value="//Samples/Templates/Credit Risk Modeling/Counterparty Risk Data"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.1.001" expanded="true" height="82" name="Set Role" width="90" x="112" y="136">
<parameter key="attribute_name" value="Default"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.1.001" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="238">
<list key="filters_list">
<parameter key="filters_entry_key" value="Default.is_not_missing."/>
</list>
</operator>
<operator activated="true" class="multiply" compatibility="8.1.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/>
<operator activated="true" class="concurrency:cross_validation" compatibility="8.1.001" expanded="true" height="145" name="Validation" width="90" x="447" y="34">
<parameter key="sampling_type" value="stratified sampling"/>
<process expanded="true">
<operator activated="true" class="h2o:logistic_regression" compatibility="7.6.001" expanded="true" height="124" name="Logistic Regression" width="90" x="175" y="34"/>
<connect from_port="training set" to_op="Logistic Regression" to_port="training set"/>
<connect from_op="Logistic Regression" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
<description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="8.1.001" expanded="true" height="82" name="Performance" width="90" x="179" y="34"/>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<connect from_op="Performance" from_port="example set" to_port="test set results"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
<description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).<br/>The performance is evaluated and sent to the operator results.</description>
</process>
<description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
</operator>
<operator activated="true" class="generate_weight_stratification" compatibility="8.1.001" expanded="true" height="82" name="Generate Weight (Stratification)" width="90" x="313" y="238">
<parameter key="total_weight" value="424.0"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="8.1.001" expanded="true" height="145" name="Validation (2)" width="90" x="447" y="238">
<parameter key="sampling_type" value="stratified sampling"/>
<process expanded="true">
<operator activated="true" class="h2o:logistic_regression" compatibility="7.6.001" expanded="true" height="124" name="Logistic Regression (2)" width="90" x="175" y="34"/>
<connect from_port="training set" to_op="Logistic Regression (2)" to_port="training set"/>
<connect from_op="Logistic Regression (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
<description align="left" color="green" colored="true" height="80" resized="false" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="8.1.001" expanded="true" height="82" name="Performance (2)" width="90" x="179" y="34"/>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_port="performance 1"/>
<connect from_op="Performance (2)" from_port="example set" to_port="test set results"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
<description align="left" color="blue" colored="true" height="103" resized="false" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).<br/>The performance is evaluated and sent to the operator results.</description>
</process>
<description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
</operator>
<connect from_op="Retrieve Counterparty Risk Data" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Validation" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Generate Weight (Stratification)" to_port="example set input"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="test result set" to_port="result 2"/>
<connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
<connect from_op="Generate Weight (Stratification)" from_port="example set output" to_op="Validation (2)" to_port="example set"/>
<connect from_op="Validation (2)" from_port="model" to_port="result 4"/>
<connect from_op="Validation (2)" from_port="test result set" to_port="result 5"/>
<connect from_op="Validation (2)" from_port="performance 1" to_port="result 6"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
<portSpacing port="sink_result 7" spacing="0"/>
</process>
</operator>
</process>
Can the operator information be updated or clarified? Thanks.
Tagged:
1
Comments
Got it @Telcontar120. Pushing to dev team. Stay tuned.
Confirmed as issue. Ticket created.
- actually make it ignore the weights (as it says it does, but currently doesn't)
- or, actually use the weights, and update the operator capabilities description accordingly (note: this is clearly the better option!)
But the way this is framed currently, I am not sure what we would be getting if we voted for this issue. Can you clarify?Lindon Ventures
Data Science Consulting from Certified RapidMiner Experts