The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
LDAEvaluationWordNet - wrong connection
Dear all
I am new to the community and also new to this field, I apologize for the dumb questions in advanced.
I am trying to run LDAEvaluationWordNet for my text mining data. However, it always shows that '' Your connection is producing the wrong type of data.......''. (Refer the XML code)
Can someone help me with that? Thank you!
I am new to the community and also new to this field, I apologize for the dumb questions in advanced.
I am trying to run LDAEvaluationWordNet for my text mining data. However, it always shows that '' Your connection is producing the wrong type of data.......''. (Refer the XML code)
Can someone help me with that? Thank you!
<?xml version="1.0" encoding="UTF-8"?><process version="9.5.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="concurrency:loop_files" compatibility="9.5.001" expanded="true" height="82" name="Loop Files (2)" width="90" x="112" y="34">
<parameter key="directory" value="C:\Users\Kevin\Desktop\REF PDF"/>
<parameter key="filter_type" value="regex"/>
<parameter key="filter_by_regex" value=".*.pdf"/>
<parameter key="recursive" value="false"/>
<parameter key="enable_macros" value="true"/>
<parameter key="macro_for_file_name" value="file_name"/>
<parameter key="macro_for_file_type" value="file_type"/>
<parameter key="macro_for_folder_name" value="folder_name"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="text:read_document" compatibility="8.2.000" expanded="true" height="68" name="Read Document" width="90" x="45" y="34">
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="pdf"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<operator activated="true" class="text:process_documents" compatibility="8.2.000" expanded="true" height="103" name="Process Documents" width="90" x="246" y="34">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_absolute" value="2"/>
<parameter key="prune_above_absolute" value="9999"/>
<parameter key="prune_below_rank" value="5.0"/>
<parameter key="prune_above_rank" value="5.0"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.2.000" expanded="true" height="68" name="Tokenize" width="90" x="179" y="136">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:stem_porter" compatibility="8.2.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="447" y="136"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.2.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="648" y="136"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:wordlist_to_data" compatibility="8.2.000" expanded="true" height="82" name="WordList to Data" width="90" x="447" y="34"/>
<connect from_port="file object" to_op="Read Document" to_port="file"/>
<connect from_op="Read Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="word list" to_op="WordList to Data" to_port="word list"/>
<connect from_op="WordList to Data" from_port="example set" to_port="output 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="9.5.001" expanded="true" height="82" name="Union Append" width="90" x="246" y="34">
<process expanded="true">
<operator activated="true" class="loop_collection" compatibility="9.5.001" expanded="true" height="82" name="Output (4)" width="90" x="45" y="34">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="macro_name" value="iteration"/>
<parameter key="macro_start_value" value="1"/>
<parameter key="unfold" value="false"/>
<process expanded="true">
<operator activated="false" breakpoints="after" class="select" compatibility="9.5.001" expanded="true" height="68" name="Select (5)" width="90" x="112" y="34">
<parameter key="index" value="%{iteration}"/>
<parameter key="unfold" value="false"/>
</operator>
<operator activated="true" class="branch" compatibility="9.5.001" expanded="true" height="82" name="Branch (2)" width="90" x="313" y="34">
<parameter key="condition_type" value="expression"/>
<parameter key="expression" value="%{iteration}==1"/>
<parameter key="io_object" value="ANOVAMatrix"/>
<parameter key="return_inner_output" value="true"/>
<process expanded="true">
<connect from_port="condition" to_port="input 1"/>
<portSpacing port="source_condition" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_input 1" spacing="0"/>
<portSpacing port="sink_input 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="recall" compatibility="9.5.001" expanded="true" height="68" name="Recall (5)" width="90" x="45" y="187">
<parameter key="name" value="LoopData"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="remove_from_store" value="true"/>
</operator>
<operator activated="true" class="union" compatibility="9.5.001" expanded="true" height="82" name="Union (2)" width="90" x="179" y="34"/>
<connect from_port="condition" to_op="Union (2)" to_port="example set 1"/>
<connect from_op="Recall (5)" from_port="result" to_op="Union (2)" to_port="example set 2"/>
<connect from_op="Union (2)" from_port="union" to_port="input 1"/>
<portSpacing port="source_condition" spacing="0"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_input 1" spacing="0"/>
<portSpacing port="sink_input 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="remember" compatibility="9.5.001" expanded="true" height="68" name="Remember (5)" width="90" x="581" y="34">
<parameter key="name" value="LoopData"/>
<parameter key="io_object" value="ExampleSet"/>
<parameter key="store_which" value="1"/>
<parameter key="remove_from_process" value="true"/>
</operator>
<connect from_port="single" to_op="Branch (2)" to_port="condition"/>
<connect from_op="Branch (2)" from_port="input 1" to_op="Remember (5)" to_port="store"/>
<connect from_op="Remember (5)" from_port="stored" to_port="output 1"/>
<portSpacing port="source_single" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="select" compatibility="9.5.001" expanded="true" height="68" name="Select (6)" width="90" x="179" y="34">
<parameter key="index" value="%{iteration}"/>
<parameter key="unfold" value="false"/>
</operator>
<connect from_port="in 1" to_op="Output (4)" to_port="collection"/>
<connect from_op="Output (4)" from_port="output 1" to_op="Select (6)" to_port="collection"/>
<connect from_op="Select (6)" from_port="selected" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="380" y="136">
<parameter key="resource_type" value="directory"/>
<parameter key="directory" value="C:\Users\Kevin\Desktop\WordNet-3.0\dict"/>
</operator>
<operator activated="true" class="corpus_linguistics_plugin_LDA:LDAEvaluationWordNet" compatibility="1.1.001" expanded="true" height="82" name="LDAEvaluationWordNet" width="90" x="380" y="34">
<parameter key="iterations" value="2000"/>
<parameter key="word net source files" value="/home/poelitz/Downloads/WordNet-2.1/dict"/>
<parameter key="method" value="WuPalmer"/>
</operator>
<connect from_op="Loop Files (2)" from_port="output 1" to_op="Union Append" to_port="in 1"/>
<connect from_op="Union Append" from_port="out 1" to_op="LDAEvaluationWordNet" to_port="example set input"/>
<connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="LDAEvaluationWordNet" to_port="example set words"/>
<connect from_op="LDAEvaluationWordNet" from_port="output neg log likelihoods" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0
Best Answer
-
sgenzer Administrator, Moderator, Employee-RapidMiner, RapidMiner Certified Analyst, Community Manager, Member, University Professor, PM Moderator Posts: 2,959 Community Managerhi @Ka13n hmm that operator causing you trouble is from the "Corpus Lingustics LDA" extension out of TU Dortmund. To be honest I had never heard of it until this post. I would recommend going to the website and asking the author about your problem. His email is posted: https://www-ai.cs.tu-dortmund.de/PERSONAL/poelitz.html
Scott5
Answers
Thanks for your kindly reply, I'll shoot him an email for the solution!