7 #include "PlusConfigure.h" 9 #include "igsioCommon.h" 11 #include "vtkObjectFactory.h" 14 #include "vtkIGSIOTrackedFrameList.h" 18 #include <tesseract/baseapi.h> 19 #include <tesseract/strngs.h> 20 #include <allheaders.h> 30 static const char* PARAMETER_LIST_TAG_NAME =
"TextFields";
31 static const char* PARAMETER_TAG_NAME =
"Field";
32 static const char* PARAMETER_NAME_ATTRIBUTE =
"Name";
33 static const char* PARAMETER_CHANNEL_ATTRIBUTE =
"Channel";
34 static const char* PARAMETER_ORIGIN_ATTRIBUTE =
"InputRegionOrigin";
35 static const char* PARAMETER_SIZE_ATTRIBUTE =
"InputRegionSize";
36 static const int PARAMETER_DEPTH_BITS = 8;
37 static const char* DEFAULT_LANGUAGE =
"eng";
38 static const int TEXT_RECOGNIZER_MISSING_INPUT_DEFAULT = 1;
45 , TrackedFrames(vtkIGSIOTrackedFrameList::New())
58 for (
FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
60 TextFieldParameter* parameter = *fieldIt;
81 #ifdef PLUS_TEST_TextRecognizer 92 std::map<double, int> queriedFramesIndexes;
93 std::vector<igsioTrackedFrame*> queriedFrames;
102 for (
FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
104 TextFieldParameter* parameter = *fieldIt;
105 igsioTrackedFrame frame;
110 if (result !=
PLUS_SUCCESS || frame.GetImageData()->GetImage() == NULL)
120 std::string textStr(text_out);
121 parameter->LatestParameterValue = igsioCommon::Trim(textStr);
124 frame.SetFrameField(parameter->ParameterName, parameter->LatestParameterValue);
129 igsioFieldMapType fieldMap;
132 for (
FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
134 fieldMap[(*fieldIt)->ParameterName].first = FRAMEFIELD_NONE;
135 fieldMap[(*fieldIt)->ParameterName].second = (*fieldIt)->LatestParameterValue;
151 igsioVideoFrame::GetOrientedClippedImage(frame.GetImageData()->GetImage(),
152 igsioVideoFrame::FlipInfoType(),
153 frame.GetImageData()->GetImageType(),
154 parameter->ScreenRegion,
158 unsigned int*
data = pixGetData(parameter->ReceivedFrame);
159 int wpl = pixGetWpl(parameter->ReceivedFrame);
160 int bpl = ((8 * parameter->Size[0]) + 7) / 8;
165 parameter->ScreenRegion->GetExtent(extents);
166 int ySize = extents[3] - extents[2];
168 int coords[3] = {0, 0, 0};
171 coords[1] =
ySize -
y - 1 + extents[2];
173 for (
int x = 0;
x < bpl;
x++)
175 coords[0] =
x + extents[0];
176 val8 = (*(
unsigned char*)parameter->ScreenRegion->GetScalarPointer(coords));
177 SET_DATA_BYTE(
line,
x, val8);
185 double mostRecent(-1);
187 if (!parameter->SourceChannel->GetVideoDataAvailable())
189 LOG_WARNING(
"Processed data is not generated, as no video data is available yet. Device ID: " << this->
GetDeviceId());
193 if (parameter->SourceChannel->GetMostRecentTimestamp(mostRecent) !=
PLUS_SUCCESS)
195 LOG_ERROR(
"Unable to retrieve most recent timestamp for parameter " << parameter->ParameterName);
200 std::map<double, int>::iterator frameIt = QueriedFramesIndexes.find(mostRecent);
203 if (frameIt == QueriedFramesIndexes.end())
206 double aTimestamp(UNDEFINED_TIMESTAMP);
207 if (parameter->SourceChannel->GetTrackedFrameList(aTimestamp, this->TrackedFrames, 1) !=
PLUS_SUCCESS)
209 LOG_INFO(
"Failed to get tracked frame list from data collector.");
218 QueriedFramesIndexes[
timestamp] = QueriedFrames.size();
219 QueriedFrames.push_back(&frame);
223 frame = (*QueriedFrames[frameIt->second]);
239 std::stringstream ss;
241 vtksys::SystemTools::PutEnv(ss.str());
242 LOG_DEBUG(
"Using tessdata directory: " << this->TessdataDirectory);
245 if (this->
TesseractAPI->Init(NULL,
Language.c_str(), tesseract::OEM_TESSERACT_CUBE_COMBINED) != 0)
247 LOG_ERROR(
"Unable to init tesseract library. Cannot perform text recognition.");
250 this->
TesseractAPI->SetPageSegMode(tesseract::PSM_SINGLE_LINE);
272 if (deviceConfig == NULL)
274 LOG_ERROR(
"Unable to continue configuration of " << this->
GetClassName() <<
". Could not find corresponding element.");
282 LOG_WARNING(
"MissingInputGracePeriodSec must be set to a value > 1s to allow input to arrive and be processed.");
286 this->SetLanguage(DEFAULT_LANGUAGE);
287 XML_READ_CSTRING_ATTRIBUTE_OPTIONAL(
Language, deviceConfig);
290 XML_FIND_NESTED_ELEMENT_OPTIONAL(screenFields, deviceConfig, PARAMETER_LIST_TAG_NAME);
292 for (
int i = 0;
i < screenFields->GetNumberOfNestedElements(); ++
i)
294 vtkXMLDataElement* fieldElement = screenFields->GetNestedElement(
i);
296 if (STRCASECMP(fieldElement->GetName(), PARAMETER_TAG_NAME) != 0)
301 const char*
channelName = fieldElement->GetAttribute(PARAMETER_CHANNEL_ATTRIBUTE);
305 LOG_ERROR(
"Cannot build field scanner. Input " << PARAMETER_CHANNEL_ATTRIBUTE <<
" is not defined or invalid " << PARAMETER_CHANNEL_ATTRIBUTE <<
" name specified.");
309 if (fieldElement->GetAttribute(PARAMETER_NAME_ATTRIBUTE) == NULL)
311 LOG_ERROR(
"Parameter " << PARAMETER_NAME_ATTRIBUTE <<
" not defined. Unable to build field scanner.");
315 int origin[2] = {-1, -1};
316 int size[2] = {-1, -1};
317 fieldElement->GetVectorAttribute(PARAMETER_ORIGIN_ATTRIBUTE, 2, origin);
318 fieldElement->GetVectorAttribute(PARAMETER_SIZE_ATTRIBUTE, 2, size);
319 if (origin[0] < 0 || origin[1] < 0 || size[0] < 0 || size[1] < 0)
321 LOG_ERROR(
"Invalid definition for " << PARAMETER_ORIGIN_ATTRIBUTE <<
" and " << PARAMETER_SIZE_ATTRIBUTE <<
". Unable to build field scanner.");
325 TextFieldParameter* parameter =
new TextFieldParameter();
326 parameter->ParameterName = std::string(fieldElement->GetAttribute(PARAMETER_NAME_ATTRIBUTE));
327 parameter->SourceChannel = aChannel;
328 parameter->Origin[0] = origin[0];
329 parameter->Origin[1] = origin[1];
330 parameter->Size[0] = size[0];
331 parameter->Size[1] = size[1];
332 parameter->ReceivedFrame = pixCreate(parameter->Size[0], parameter->Size[1], PARAMETER_DEPTH_BITS);
333 parameter->ScreenRegion = vtkSmartPointer<vtkImageData>::New();
334 parameter->ScreenRegion->SetExtent(0, size[0] - 1, 0, size[1] - 1, 0, 0);
335 parameter->ScreenRegion->AllocateScalars(VTK_UNSIGNED_CHAR, 1);
348 if (!igsioCommon::IsEqualInsensitive(this->
Language, DEFAULT_LANGUAGE))
350 XML_WRITE_STRING_ATTRIBUTE_IF_NOT_EMPTY(
Language, deviceConfig);
353 XML_FIND_NESTED_ELEMENT_CREATE_IF_MISSING(screenFields, deviceConfig, PARAMETER_LIST_TAG_NAME);
357 for (
FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
359 TextFieldParameter* parameter = *fieldIt;
361 XML_FIND_NESTED_ELEMENT_CREATE_IF_MISSING(fieldElement, screenFields, PARAMETER_TAG_NAME);
363 fieldElement->SetAttribute(PARAMETER_CHANNEL_ATTRIBUTE, parameter->SourceChannel->GetChannelId());
364 fieldElement->SetAttribute(PARAMETER_NAME_ATTRIBUTE, parameter->ParameterName.c_str());
365 fieldElement->SetVectorAttribute(PARAMETER_ORIGIN_ATTRIBUTE, 2, parameter->Origin.data());
366 fieldElement->SetVectorAttribute(PARAMETER_SIZE_ATTRIBUTE, 2, parameter->Size.data());
378 LOG_ERROR(
"Screen reader needs at least one input image to analyze. Please add an input channel with video data.");
384 LOG_ERROR(
"Input channel does not have a video source. Need video to analyze.");
390 LOG_ERROR(
"No output channels defined. Recognizer needs an output channel to send text.");
396 LOG_ERROR(
"Screen reader has no fields defined. There's nothing for me to do!");
405 LOG_ERROR(
"Screen reader requires an output channel.");
411 LOG_ERROR(
"Screen reader requires an output channel with at least one field data source defined.");
static const int VIRTUAL_DEVICE_FRAME_RATE
virtual void PrintSelf(ostream &os, vtkIndent indent) VTK_OVERRIDE
Abstract interface for tracker and video devices.
#define XML_FIND_DEVICE_ELEMENT_REQUIRED_FOR_WRITING(deviceConfig, rootConfigElement)
std::string TessdataDirectory
ChannelFieldListMap::iterator ChannelFieldListMapIterator
vtkPlusChannel * OutputChannel
Optional output channel to store recognized fields for broadcasting.
ChannelContainer InputChannels
DataSourceContainerIterator GetFieldDataSourcesStartIterator()
virtual std::string GetDeviceId() const
vtkIGSIOTrackedFrameList * TrackedFrames
virtual PlusStatus ReadConfiguration(vtkXMLDataElement *)
virtual PlusStatus InternalConnect()
virtual PlusStatus NotifyConfigured()
virtual vtkPlusDataCollector * GetDataCollector()
static vtkPlusConfig * GetInstance()
bool GetFieldDataEnabled() const
virtual PlusStatus WriteConfiguration(vtkXMLDataElement *)
virtual void PrintSelf(ostream &os, vtkIndent indent) VTK_OVERRIDE
virtual PlusStatus ReadConfiguration(vtkXMLDataElement *)
double MissingInputGracePeriodSec
virtual ~vtkPlusVirtualTextRecognizer()
unsigned long FrameNumber
vtkStandardNewMacro(vtkPlusVirtualTextRecognizer)
virtual const char * GetClassName()
vtkXMLDataElement * FindThisDeviceElement(vtkXMLDataElement *rootXMLElement)
vtkPlusVirtualTextRecognizer()
virtual PlusStatus InternalDisconnect()
PlusStatus FindOrQueryFrame(igsioTrackedFrame &frame, std::map< double, int > &queriedFramesIndexes, TextFieldParameter *parameter, std::vector< igsioTrackedFrame * > &queriedFrames)
If a frame has been queried for this input channel, reuse it instead of getting a new one.
DataSourceContainer::iterator DataSourceContainerIterator
void vtkImageDataToPix(igsioTrackedFrame &frame, TextFieldParameter *parameter)
Convert a vtkImage data to leptonica pix format.
bool StartThreadForInternalUpdates
FieldList::iterator FieldListIterator
bool HasGracePeriodExpired()
const char ** channelName
Contains an optional timestamped circular buffer containing the video images and a number of timestam...
ChannelContainer OutputChannels
Direction vectors of rods y
ChannelFieldListMap RecognitionFields
Map of channels to fields so that we only have to grab an image once from the each source channel.
void ClearConfiguration()
Remove any configuration data.
tesseract::TessBaseAPI * TesseractAPI
Main entry point for the tesseract API.
std::map< vtkPlusChannel *, FieldList > ChannelFieldListMap
DataSourceContainerIterator GetFieldDataSourcesEndIterator()
virtual PlusStatus InternalUpdate()
std::string Language
Language used for detection.