The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
"apply weka:W-HierarchicalClusterer"
Hello friends of the community. a query
I'm working with text mining - clustering
I performed the pre-processing text files, create the TF-IDF Vertor, filter the STOP-WORDS and the next step I need to apply the algorithm "Weka: W-HierarchicalClusterer" but I get the following error:
Jan 28, 2013 6:01:32 PM SEVERE: Process failed: W-HierarchicalClusterer caused an error: com.rapidminer.operator.UserError: caused an error: java.lang.ArrayIndexOutOfBoundsException: 10
Jan 28, 2013 6:01:32 PM SEVERE: Here: Process[1] (Process)
subprocess 'Main Process'
+- Process Documents from Files[1] (Process Documents from Files)
subprocess 'Vector Creation'
| +- Transform Cases[6] (Transform Cases)
| +- Tokenize[6] (Tokenize)
| +- Filter stopwords_pronombres_preposiciones[6] (Filter Stopwords (Dictionary))
| +- Filter stopwords_caratula[6] (Filter Stopwords (Dictionary))
| +- Filter Stopwords (English)[6] (Filter Stopwords (English))
| +- Filter Tokens (by Length)[6] (Filter Tokens (by Length))
==> +- W-HierarchicalClusterer[1] (W-HierarchicalClusterer)
Add my process XML down
I'm working with text mining - clustering
I performed the pre-processing text files, create the TF-IDF Vertor, filter the STOP-WORDS and the next step I need to apply the algorithm "Weka: W-HierarchicalClusterer" but I get the following error:
Jan 28, 2013 6:01:32 PM SEVERE: Process failed: W-HierarchicalClusterer caused an error: com.rapidminer.operator.UserError: caused an error: java.lang.ArrayIndexOutOfBoundsException: 10
Jan 28, 2013 6:01:32 PM SEVERE: Here: Process[1] (Process)
subprocess 'Main Process'
+- Process Documents from Files[1] (Process Documents from Files)
subprocess 'Vector Creation'
| +- Transform Cases[6] (Transform Cases)
| +- Tokenize[6] (Tokenize)
| +- Filter stopwords_pronombres_preposiciones[6] (Filter Stopwords (Dictionary))
| +- Filter stopwords_caratula[6] (Filter Stopwords (Dictionary))
| +- Filter Stopwords (English)[6] (Filter Stopwords (English))
| +- Filter Tokens (by Length)[6] (Filter Tokens (by Length))
==> +- W-HierarchicalClusterer[1] (W-HierarchicalClusterer)
Add my process XML down
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<process expanded="true" height="386" width="547">
<operator activated="true" class="text:process_document_from_file" compatibility="5.2.004" expanded="true" height="76" name="Process Documents from Files" width="90" x="112" y="75">
<list key="text_directories">
<parameter key="doc1" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc1"/>
<parameter key="doc2" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc2"/>
<parameter key="doc3" value="C:\Users\marcos\Desktop\Datos de prueba para clustering\Caso de prueba 2\En español\doc3"/>
</list>
<process expanded="true" height="415" width="758">
<operator activated="true" class="text:transform_cases" compatibility="5.2.004" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/>
<operator activated="true" class="text:tokenize" compatibility="5.2.004" expanded="true" height="60" name="Tokenize" width="90" x="45" y="120"/>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_pronombres_preposiciones" width="90" x="45" y="210">
<parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_pronombres_preposiciones.txt"/>
</operator>
<operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.004" expanded="true" height="76" name="Filter stopwords_caratula" width="90" x="45" y="300">
<parameter key="file" value="C:\Users\marcos\Desktop\stopwords\stopwords_caratula.txt"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.2.004" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="179" y="30"/>
<operator activated="true" class="text:filter_by_length" compatibility="5.2.004" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="179" y="120">
<parameter key="min_chars" value="3"/>
</operator>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter stopwords_pronombres_preposiciones" to_port="document"/>
<connect from_op="Filter stopwords_pronombres_preposiciones" from_port="document" to_op="Filter stopwords_caratula" to_port="document"/>
<connect from_op="Filter stopwords_caratula" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="weka:W-HierarchicalClusterer" compatibility="5.1.001" expanded="true" height="76" name="W-HierarchicalClusterer" width="90" x="309" y="173"/>
<connect from_op="Process Documents from Files" from_port="example set" to_op="W-HierarchicalClusterer" to_port="example set"/>
<connect from_op="W-HierarchicalClusterer" from_port="cluster model" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
If that still does not help, you could try one of RapidMiner's built-in operators for hierarchical clustering. You'll find all clustering algorithms in the group Modelling / Clustering and Segmentation.
Best regards,
Marius