The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
[SOLVED] Replace missing value with subgroup average
Hi to all,
I'm a new user with rapid miner.
I start to use some package and immediatly i'm blocked in a problem.
I want to replace some missing value with the average of the same attribute grouped with an other attribute. For example i have
hour - value 1 - value 2
1 - 10 - 20
2 - 15 - 25
3 - 32 - 8
1 - 12 - 18
2 - 10 - 29
3 - 27 - 11
1 - 5 - 24
2 - 14 - 20
3 - 10 - 3
1 - ? - ?
should became
1 - (10+12+5)/3 - (20+18+24)/3
there's a way to do this thing??
thanks all
I'm a new user with rapid miner.
I start to use some package and immediatly i'm blocked in a problem.
I want to replace some missing value with the average of the same attribute grouped with an other attribute. For example i have
hour - value 1 - value 2
1 - 10 - 20
2 - 15 - 25
3 - 32 - 8
1 - 12 - 18
2 - 10 - 29
3 - 27 - 11
1 - 5 - 24
2 - 14 - 20
3 - 10 - 3
1 - ? - ?
should became
1 - (10+12+5)/3 - (20+18+24)/3
there's a way to do this thing??
thanks all
0
Answers
you can do this using a loop values, and then do the replacement for each group. Attached is an example process.
Cheers,
Martin
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" breakpoints="after" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="Generate Data (2)" width="90" x="45" y="75">
<process expanded="true">
<operator activated="true" class="generate_data" compatibility="6.4.000" expanded="true" height="60" name="Generate Data" width="90" x="179" y="75">
<parameter key="number_examples" value="5"/>
<parameter key="number_of_attributes" value="3"/>
</operator>
<operator activated="true" class="discretize_by_bins" compatibility="6.4.000" expanded="true" height="94" name="Discretize" width="90" x="313" y="75">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="att1"/>
<parameter key="range_name_type" value="short"/>
</operator>
<operator activated="true" class="set_data" compatibility="6.4.000" expanded="true" height="76" name="Set Data" width="90" x="447" y="75">
<parameter key="example_index" value="2"/>
<parameter key="attribute_name" value="att2"/>
<parameter key="value" value="-1"/>
<list key="additional_values"/>
</operator>
<operator activated="true" class="declare_missing_value" compatibility="6.4.000" expanded="true" height="76" name="Declare Missing Value" width="90" x="581" y="75">
<parameter key="numeric_value" value="-1.0"/>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Discretize" to_port="example set input"/>
<connect from_op="Discretize" from_port="example set output" to_op="Set Data" to_port="example set input"/>
<connect from_op="Set Data" from_port="example set output" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Just generate an example set for demo purposes</description>
</operator>
<operator activated="true" class="loop_values" compatibility="6.4.000" expanded="true" height="94" name="Loop Values" width="90" x="313" y="75">
<parameter key="attribute" value="att1"/>
<process expanded="true">
<operator activated="true" class="multiply" compatibility="6.4.000" expanded="true" height="94" name="Multiply" width="90" x="45" y="30"/>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples" width="90" x="246" y="165">
<list key="filters_list">
<parameter key="filters_entry_key" value="att1.equals.%{loop_value}"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Filter for each range</description>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="6.4.000" expanded="true" height="94" name="Replace Missing Values" width="90" x="380" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="att2"/>
<list key="columns"/>
<description align="center" color="transparent" colored="false" width="126">Replace by Average</description>
</operator>
<connect from_port="example set" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_port="out 1"/>
<connect from_op="Multiply" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_port="out 2"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
</operator>
<operator activated="true" class="append" compatibility="6.4.000" expanded="true" height="76" name="Append" width="90" x="447" y="75"/>
<connect from_op="Generate Data (2)" from_port="out 1" to_op="Loop Values" to_port="example set"/>
<connect from_op="Loop Values" from_port="out 2" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Dortmund, Germany