The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Consensus Cluster / Cluster Ensemble: How to realize this
Hi guys,
in my master thesis i have to realize a consensus cluster (cc), (or cluster ensemble called). My simple example for testing is to create a cc (k-medoids and k-means) of iris... Do not ask about the sense of this combination
Unfortunately, my search provided no response, how can I create a cc in RapidMiner 5. Simple tinkering has not brought me further. The Group Model operator or the Model Combiner operator do not seem to be the right thing.
My problem ist to combine the cluster outputs to an example set, so that i can put them in a classification operator. This applies to the attempt on the exsample set itself and the models from the cluster operators. Maybe I have an understanding problem? Maybe I need to make a classification for each cluster and combine these results? ???
My main strategy is the following:
Briefly, I considered to solve it with the R integration. But there must be a better way. I would be happy about any ideas, suggestions or comments. Thanks in advance
in my master thesis i have to realize a consensus cluster (cc), (or cluster ensemble called). My simple example for testing is to create a cc (k-medoids and k-means) of iris... Do not ask about the sense of this combination
Unfortunately, my search provided no response, how can I create a cc in RapidMiner 5. Simple tinkering has not brought me further. The Group Model operator or the Model Combiner operator do not seem to be the right thing.
My problem ist to combine the cluster outputs to an example set, so that i can put them in a classification operator. This applies to the attempt on the exsample set itself and the models from the cluster operators. Maybe I have an understanding problem? Maybe I need to make a classification for each cluster and combine these results? ???
My main strategy is the following:
- Pre-processing
- Split dataset
- Use different cluster algorithms
- Create a cc
- Create a classification model based on the cc
- ...
Briefly, I considered to solve it with the R integration. But there must be a better way. I would be happy about any ideas, suggestions or comments. Thanks in advance
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="parallelize_main_process" value="false"/>
<process expanded="true" height="431" width="748">
<operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="94" name="Multiply" width="90" x="179" y="30"/>
<operator activated="true" class="k_medoids" compatibility="5.2.008" expanded="true" height="76" name="k-medois" width="90" x="313" y="120">
<parameter key="add_cluster_attribute" value="true"/>
<parameter key="add_as_label" value="false"/>
<parameter key="remove_unlabeled" value="false"/>
<parameter key="k" value="2"/>
<parameter key="max_runs" value="10"/>
<parameter key="max_optimization_steps" value="100"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="measure_types" value="MixedMeasures"/>
<parameter key="mixed_measure" value="MixedEuclideanDistance"/>
<parameter key="nominal_measure" value="NominalDistance"/>
<parameter key="numerical_measure" value="EuclideanDistance"/>
<parameter key="divergence" value="GeneralizedIDivergence"/>
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_degree" value="3.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
</operator>
<operator activated="true" class="k_means" compatibility="5.2.008" expanded="true" height="76" name="k-means" width="90" x="313" y="30">
<parameter key="add_cluster_attribute" value="true"/>
<parameter key="add_as_label" value="false"/>
<parameter key="remove_unlabeled" value="false"/>
<parameter key="k" value="2"/>
<parameter key="max_runs" value="10"/>
<parameter key="determine_good_start_values" value="false"/>
<parameter key="measure_types" value="BregmanDivergences"/>
<parameter key="mixed_measure" value="MixedEuclideanDistance"/>
<parameter key="nominal_measure" value="NominalDistance"/>
<parameter key="numerical_measure" value="EuclideanDistance"/>
<parameter key="divergence" value="SquaredEuclideanDistance"/>
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_degree" value="3.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
<parameter key="max_optimization_steps" value="100"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="support_vector_machine" compatibility="5.2.008" expanded="true" height="112" name="SVM" width="90" x="514" y="30">
<parameter key="kernel_type" value="dot"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_degree" value="2.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
<parameter key="kernel_cache" value="200"/>
<parameter key="C" value="0.0"/>
<parameter key="convergence_epsilon" value="0.0010"/>
<parameter key="max_iterations" value="100000"/>
<parameter key="scale" value="true"/>
<parameter key="calculate_weights" value="true"/>
<parameter key="return_optimization_performance" value="true"/>
<parameter key="L_pos" value="1.0"/>
<parameter key="L_neg" value="1.0"/>
<parameter key="epsilon" value="0.0"/>
<parameter key="epsilon_plus" value="0.0"/>
<parameter key="epsilon_minus" value="0.0"/>
<parameter key="balance_cost" value="false"/>
<parameter key="quadratic_loss_pos" value="false"/>
<parameter key="quadratic_loss_neg" value="false"/>
<parameter key="estimate_performance" value="false"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="k-means" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="k-medois" to_port="example set"/>
<connect from_op="SVM" from_port="model" to_port="result 1"/>
<connect from_op="SVM" from_port="weights" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
0
Answers
You could use the rename and set role operators followed by the join operator to make a combined example set with the result of the various clusterings. From there use a suitable classifier - I used Naive Bayes because SVM doesn't like polynominal attributes.
Here's an example... regards
Andrew