The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
"[SOLVED] Silhouette index and k-means"
Hello friends of the community. a query
I want to perform validation of some clusters with k-means performed with the metric "Silhouette" for this I downloaded the. Jar of this page:
Then copy the. Jar to the folder lib / plugins
then connect the output of the operator "k-means clustering," the operator "Silhouette" but it seems to me are missing something, because I get the following error
"Process failed: No data was delivered at port Performance.similarity (disconnected)."
accompanying the process xml
thanks
regards
I want to perform validation of some clusters with k-means performed with the metric "Silhouette" for this I downloaded the. Jar of this page:
Then copy the. Jar to the folder lib / plugins
then connect the output of the operator "k-means clustering," the operator "Silhouette" but it seems to me are missing something, because I get the following error
"Process failed: No data was delivered at port Performance.similarity (disconnected)."
accompanying the process xml
could someone tell me what is wrong?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="386" width="681">
<operator activated="true" class="text:process_document_from_file" compatibility="5.2.004" expanded="true" height="76" name="Process Documents from Files" width="90" x="112" y="75">
<list key="text_directories">
<parameter key="doc1" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc1"/>
<parameter key="doc2" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc2"/>
<parameter key="doc3" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc3"/>
</list>
<process expanded="true" height="415" width="758">
<operator activated="true" class="text:transform_cases" compatibility="5.2.004" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/>
<operator activated="true" class="text:tokenize" compatibility="5.2.004" expanded="true" height="60" name="Tokenize" width="90" x="45" y="120"/>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_pronombres_preposiciones" width="90" x="45" y="210">
<parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_pronombres_preposiciones.txt"/>
</operator>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_caratula" width="90" x="45" y="300">
<parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_caratula.txt"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.2.004" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="179" y="30"/>
<operator activated="true" class="text:filter_by_length" compatibility="5.2.004" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="179" y="120">
<parameter key="min_chars" value="3"/>
</operator>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter stopwords_pronombres_preposiciones" to_port="document"/>
<connect from_op="Filter stopwords_pronombres_preposiciones" from_port="document" to_op="Filter stopwords_caratula" to_port="document"/>
<connect from_op="Filter stopwords_caratula" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="k_means" compatibility="5.2.008" expanded="true" height="76" name="Clustering" width="90" x="320" y="132">
<parameter key="add_as_label" value="true"/>
<parameter key="k" value="3"/>
<parameter key="measure_types" value="NumericalMeasures"/>
<parameter key="numerical_measure" value="CosineSimilarity"/>
</operator>
<operator activated="true" class="rmx_cpplugin:cluster_silhouette" compatibility="0.2.000" expanded="true" height="112" name="Performance" width="90" x="541" y="206"/>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Clustering" to_port="example set"/>
<connect from_op="Clustering" from_port="cluster model" to_op="Performance" to_port="cluster model"/>
<connect from_op="Clustering" from_port="clustered set" to_op="Performance" to_port="example set"/>
<connect from_op="Performance" from_port="example set" to_port="result 1"/>
<connect from_op="Performance" from_port="performance vector" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
thanks
regards
Tagged:
0
Answers
regards
I have no idea what extension you were using because your original post is missing any links, however the error usually indicates that an input port of an operator is not connected (and therefore cannot recieve data).
Regards,
Marco
The plugin is called "CPPlugin-0.2"
I add this plugin in \Rapid-I\RapidMiner5\lib and gave the operator Silhouette index
I tested your process with the plugin version 0.3 (not the 0.2 you were using) and RapidMiner 5.3.000 and I used some data from the Samples repository instead of your Process Documents operator - and it worked just fine. Maybe try the latest version of the plugin and make sure your Process Documents operator returns something useful (you can use a breakpoint on it to see its output).
Regards,
Marco
regards
please note that this is a 3rd party extension. I just googled the name of the extension you wrote in your previous post and the first link was a hit.
Regards,
Marco