The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Image Mining
jameshickman
Member Posts: 2 Contributor I
I have several thousand spare parts each of which has multiple photographs taken from various angles.
I want to build a model to identify which spare part is pictured in a new photograph.
In data mining terms I want to:
Extract features from the images
Train a learner to categorize the images based on the features
If I use the global features extraction I get good results on a limited sample of training images with this process:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:multiple_color_image_opener" compatibility="1.4.001" expanded="true" height="60" name="MCIO" width="90" x="45" y="30">
<list key="images">
<parameter key="S001071" value="D:\Rotationals\S001071"/>
<parameter key="S001079" value="D:\Rotationals\S001079"/>
<parameter key="S001128" value="D:\Rotationals\S001128"/>
<parameter key="S001129" value="D:\Rotationals\S001129"/>
<parameter key="S001496" value="D:\Rotationals\S001496"/>
<parameter key="S001527" value="D:\Rotationals\S001527"/>
<parameter key="S001532" value="D:\Rotationals\S001532"/>
<parameter key="S002047" value="D:\Rotationals\S002047"/>
<parameter key="S002443" value="D:\Rotationals\S002443"/>
<parameter key="S002518" value="D:\Rotationals\S002518"/>
</list>
<parameter key="assign_label" value="true"/>
<parameter key="parallelize_executed_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:global_feature_extraction" compatibility="1.4.001" expanded="true" height="60" name="Global Feature Extractor from a Single Image" width="90" x="179" y="30">
<process expanded="true">
<operator activated="true" class="imageprocessing:statistics" compatibility="1.4.001" expanded="true" height="60" name="Global statistics" width="90" x="179" y="30"/>
<operator activated="true" class="imageprocessing:color_to_grayscale" compatibility="1.4.001" expanded="true" height="60" name="Color to grayscale" width="90" x="179" y="165"/>
<operator activated="true" class="imageprocessing:obcf" compatibility="1.4.001" expanded="true" height="60" name="OBCF" width="90" x="380" y="165"/>
<connect from_port="color image plus 1" to_op="Global statistics" to_port="color image plus"/>
<connect from_port="color image plus 2" to_op="Color to grayscale" to_port="color image plus"/>
<connect from_op="Global statistics" from_port="features" to_port="feature 1"/>
<connect from_op="Color to grayscale" from_port="grayscale image" to_op="OBCF" to_port="grayscale image plus"/>
<connect from_op="OBCF" from_port="features" to_port="feature 2"/>
<portSpacing port="source_color image plus 1" spacing="0"/>
<portSpacing port="source_color image plus 2" spacing="0"/>
<portSpacing port="source_color image plus 3" spacing="0"/>
<portSpacing port="sink_feature 1" spacing="0"/>
<portSpacing port="sink_feature 2" spacing="0"/>
<portSpacing port="sink_feature 3" spacing="0"/>
</process>
</operator>
<connect from_port="color image plus" to_op="Global Feature Extractor from a Single Image" to_port="color image plus"/>
<connect from_op="Global Feature Extractor from a Single Image" from_port="example set" to_port="Example set"/>
<portSpacing port="source_color image plus" spacing="0"/>
<portSpacing port="sink_Example set" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" compatibility="5.1.002" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<description>A cross-validation evaluating a decision tree model.</description>
<parameter key="parallelize_training" value="true"/>
<parameter key="parallelize_testing" value="true"/>
<process expanded="true">
<operator activated="true" class="random_forest" compatibility="5.3.015" expanded="true" height="76" name="Random Forest" width="90" x="84" y="30">
<parameter key="number_of_trees" value="20"/>
</operator>
<connect from_port="training" to_op="Random Forest" to_port="training set"/>
<connect from_op="Random Forest" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.015" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="5.3.015" expanded="true" height="76" name="Performance" width="90" x="179" y="30"/>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="MCIO" from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 2"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
But I think it will struggle when I expand the number of objects and the real-world image data comes in with wide variations in background colour, perspective, illumination, etc.
Having read a little about object recognition and image processing, it would seem that more promising methods of feature extraction for this type of task are SIFT and SURF.
I can't find any Rapidminer operator for SIFT but the image mining extension has an operator called descriptor_surf, in the Feature Extraction--->Local Features
My problem is that when I run this operator all the output attributes V0 to V63 are Zero! This is the process:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:open_color_image" compatibility="1.4.001" expanded="true" height="60" name="Open Color Image" width="90" x="45" y="210">
<parameter key="filename" value="D:\Rotationals\S002443\S002443_0003.png"/>
<parameter key="set_mask" value="false"/>
<parameter key="force_conversion" value="false"/>
</operator>
<operator activated="true" class="imageprocessing:color_to_grayscale" compatibility="1.4.001" expanded="true" height="60" name="Color to grayscale (2)" width="90" x="179" y="210"/>
<operator activated="true" class="imageprocessing:descriptor_surf" compatibility="1.4.001" expanded="true" height="60" name="descriptor_surf" width="90" x="313" y="210"/>
<connect from_op="Open Color Image" from_port="color image plus" to_op="Color to grayscale (2)" to_port="color image plus"/>
<connect from_op="Color to grayscale (2)" from_port="grayscale image" to_op="descriptor_surf" to_port="image"/>
<connect from_op="descriptor_surf" from_port="features" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
1) Am I correct in thinking that the descriptor_surf is the operator I need ?
2) Why are all the values zero, do I need to pre-process the images or do some other conversion before running it?
Thanks,
James
I want to build a model to identify which spare part is pictured in a new photograph.
In data mining terms I want to:
Extract features from the images
Train a learner to categorize the images based on the features
If I use the global features extraction I get good results on a limited sample of training images with this process:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:multiple_color_image_opener" compatibility="1.4.001" expanded="true" height="60" name="MCIO" width="90" x="45" y="30">
<list key="images">
<parameter key="S001071" value="D:\Rotationals\S001071"/>
<parameter key="S001079" value="D:\Rotationals\S001079"/>
<parameter key="S001128" value="D:\Rotationals\S001128"/>
<parameter key="S001129" value="D:\Rotationals\S001129"/>
<parameter key="S001496" value="D:\Rotationals\S001496"/>
<parameter key="S001527" value="D:\Rotationals\S001527"/>
<parameter key="S001532" value="D:\Rotationals\S001532"/>
<parameter key="S002047" value="D:\Rotationals\S002047"/>
<parameter key="S002443" value="D:\Rotationals\S002443"/>
<parameter key="S002518" value="D:\Rotationals\S002518"/>
</list>
<parameter key="assign_label" value="true"/>
<parameter key="parallelize_executed_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:global_feature_extraction" compatibility="1.4.001" expanded="true" height="60" name="Global Feature Extractor from a Single Image" width="90" x="179" y="30">
<process expanded="true">
<operator activated="true" class="imageprocessing:statistics" compatibility="1.4.001" expanded="true" height="60" name="Global statistics" width="90" x="179" y="30"/>
<operator activated="true" class="imageprocessing:color_to_grayscale" compatibility="1.4.001" expanded="true" height="60" name="Color to grayscale" width="90" x="179" y="165"/>
<operator activated="true" class="imageprocessing:obcf" compatibility="1.4.001" expanded="true" height="60" name="OBCF" width="90" x="380" y="165"/>
<connect from_port="color image plus 1" to_op="Global statistics" to_port="color image plus"/>
<connect from_port="color image plus 2" to_op="Color to grayscale" to_port="color image plus"/>
<connect from_op="Global statistics" from_port="features" to_port="feature 1"/>
<connect from_op="Color to grayscale" from_port="grayscale image" to_op="OBCF" to_port="grayscale image plus"/>
<connect from_op="OBCF" from_port="features" to_port="feature 2"/>
<portSpacing port="source_color image plus 1" spacing="0"/>
<portSpacing port="source_color image plus 2" spacing="0"/>
<portSpacing port="source_color image plus 3" spacing="0"/>
<portSpacing port="sink_feature 1" spacing="0"/>
<portSpacing port="sink_feature 2" spacing="0"/>
<portSpacing port="sink_feature 3" spacing="0"/>
</process>
</operator>
<connect from_port="color image plus" to_op="Global Feature Extractor from a Single Image" to_port="color image plus"/>
<connect from_op="Global Feature Extractor from a Single Image" from_port="example set" to_port="Example set"/>
<portSpacing port="source_color image plus" spacing="0"/>
<portSpacing port="sink_Example set" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" compatibility="5.1.002" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<description>A cross-validation evaluating a decision tree model.</description>
<parameter key="parallelize_training" value="true"/>
<parameter key="parallelize_testing" value="true"/>
<process expanded="true">
<operator activated="true" class="random_forest" compatibility="5.3.015" expanded="true" height="76" name="Random Forest" width="90" x="84" y="30">
<parameter key="number_of_trees" value="20"/>
</operator>
<connect from_port="training" to_op="Random Forest" to_port="training set"/>
<connect from_op="Random Forest" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.015" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="5.3.015" expanded="true" height="76" name="Performance" width="90" x="179" y="30"/>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="MCIO" from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 2"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
But I think it will struggle when I expand the number of objects and the real-world image data comes in with wide variations in background colour, perspective, illumination, etc.
Having read a little about object recognition and image processing, it would seem that more promising methods of feature extraction for this type of task are SIFT and SURF.
I can't find any Rapidminer operator for SIFT but the image mining extension has an operator called descriptor_surf, in the Feature Extraction--->Local Features
My problem is that when I run this operator all the output attributes V0 to V63 are Zero! This is the process:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:open_color_image" compatibility="1.4.001" expanded="true" height="60" name="Open Color Image" width="90" x="45" y="210">
<parameter key="filename" value="D:\Rotationals\S002443\S002443_0003.png"/>
<parameter key="set_mask" value="false"/>
<parameter key="force_conversion" value="false"/>
</operator>
<operator activated="true" class="imageprocessing:color_to_grayscale" compatibility="1.4.001" expanded="true" height="60" name="Color to grayscale (2)" width="90" x="179" y="210"/>
<operator activated="true" class="imageprocessing:descriptor_surf" compatibility="1.4.001" expanded="true" height="60" name="descriptor_surf" width="90" x="313" y="210"/>
<connect from_op="Open Color Image" from_port="color image plus" to_op="Color to grayscale (2)" to_port="color image plus"/>
<connect from_op="Color to grayscale (2)" from_port="grayscale image" to_op="descriptor_surf" to_port="image"/>
<connect from_op="descriptor_surf" from_port="features" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
1) Am I correct in thinking that the descriptor_surf is the operator I need ?
2) Why are all the values zero, do I need to pre-process the images or do some other conversion before running it?
Thanks,
James
0
Answers
Best,
Vaclav
In the SIFT approach the interest points are detected using the DoG (Difference of Gaussians) detector and the region descriptor is a histogram of gradients (HoG) or alternatively CCH (Contrast Context Histograms).
I can't find a DoG interest point detector but can identify POI's with the Harris or Hessian detectors.
I can extract local features, but none of the local feature extraction operators seem to correspond with HoG or CCH (Although there is a HoG Detector)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<parameter key="parallelize_main_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:open_color_image" compatibility="1.4.001" expanded="true" height="60" name="Open Color Image" width="90" x="45" y="120">
<parameter key="filename" value="D:\Rotationals\S001532\S001532_0005.png"/>
<parameter key="set_mask" value="false"/>
<parameter key="force_conversion" value="false"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="94" name="Multiply (2)" width="90" x="45" y="300"/>
<operator activated="true" class="imageprocessing:color_to_grayscale" compatibility="1.4.001" expanded="true" height="60" name="Color to grayscale" width="90" x="179" y="210"/>
<operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="76" name="Multiply" width="90" x="380" y="255"/>
<operator activated="true" class="imageprocessing:interest_point_detector" compatibility="1.4.001" expanded="true" height="60" name="Interest point detector" width="90" x="246" y="30">
<parameter key="num_of_features" value="100"/>
</operator>
<operator activated="true" class="imageprocessing:feature_extraction_operator2" compatibility="1.4.001" expanded="true" height="94" name="Local Feature Extractor with Points" width="90" x="581" y="120">
<parameter key="parallelize_executed_process" value="true"/>
<process expanded="true">
<operator activated="true" class="imageprocessing:color_to_grayscale" compatibility="1.4.001" expanded="true" height="60" name="Color to grayscale (2)" width="90" x="45" y="30"/>
<operator activated="true" class="multiply" compatibility="5.3.015" expanded="true" height="148" name="Multiply (3)" width="90" x="246" y="30"/>
<operator activated="true" class="imageprocessing:line_haar_like_feature" compatibility="1.4.001" expanded="true" height="60" name="Line Haar-like feature" width="90" x="447" y="210"/>
<operator activated="true" class="imageprocessing:circle_pixels_extractor" compatibility="1.4.001" expanded="true" height="60" name="CPE" width="90" x="447" y="120"/>
<operator activated="true" class="imageprocessing:diagonal_haar_like_feature" compatibility="1.4.001" expanded="true" height="60" name="Diagonal Haar-like feature" width="90" x="447" y="165"/>
<operator activated="true" class="imageprocessing:bvlc" compatibility="1.4.001" expanded="true" height="60" name="BVLC" width="90" x="447" y="75"/>
<operator activated="true" class="imageprocessing:contrast_of_gray_level_values" compatibility="1.4.001" expanded="true" height="60" name="contrast_of_gray_level_values" width="90" x="447" y="30"/>
<connect from_port="image 1" to_op="Color to grayscale (2)" to_port="color image plus"/>
<connect from_op="Color to grayscale (2)" from_port="grayscale image" to_op="Multiply (3)" to_port="input"/>
<connect from_op="Multiply (3)" from_port="output 1" to_op="contrast_of_gray_level_values" to_port="grayscale image plus"/>
<connect from_op="Multiply (3)" from_port="output 2" to_op="BVLC" to_port="grayscale image plus"/>
<connect from_op="Multiply (3)" from_port="output 3" to_op="Diagonal Haar-like feature" to_port="grayscale image plus"/>
<connect from_op="Multiply (3)" from_port="output 4" to_op="CPE" to_port="grayscale image plus"/>
<connect from_op="Multiply (3)" from_port="output 5" to_op="Line Haar-like feature" to_port="grayscale image plus"/>
<connect from_op="Line Haar-like feature" from_port="feature" to_port="feature 5"/>
<connect from_op="CPE" from_port="feature" to_port="feature 4"/>
<connect from_op="Diagonal Haar-like feature" from_port="feature" to_port="feature 3"/>
<connect from_op="BVLC" from_port="feature" to_port="feature 2"/>
<connect from_op="contrast_of_gray_level_values" from_port="feature" to_port="feature 1"/>
<portSpacing port="source_image 1" spacing="0"/>
<portSpacing port="source_image 2" spacing="0"/>
<portSpacing port="sink_feature 1" spacing="0"/>
<portSpacing port="sink_feature 2" spacing="0"/>
<portSpacing port="sink_feature 3" spacing="0"/>
<portSpacing port="sink_feature 4" spacing="0"/>
<portSpacing port="sink_feature 5" spacing="0"/>
<portSpacing port="sink_feature 6" spacing="0"/>
</process>
</operator>
<connect from_op="Open Color Image" from_port="color image plus" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Color to grayscale" to_port="color image plus"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Local Feature Extractor with Points" to_port="color image plus"/>
<connect from_op="Color to grayscale" from_port="grayscale image" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Interest point detector" to_port="image"/>
<connect from_op="Interest point detector" from_port="poi" to_op="Local Feature Extractor with Points" to_port="points"/>
<connect from_op="Local Feature Extractor with Points" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
1) What is the operator equivalent of HoG in local feature extraction ?
2) What is the operator equivalent of CCH in local feature extraction
3) If neither exist what would be the best choice to for a SIFT like approach ?
4) Does the HoG detector operator work on the whole image or does it extract POI as part of its process (If so, using what algortihm)?
Thanks for your help,
James