PlusLib  2.9.0
Software library for tracked ultrasound image acquisition, calibration, and processing.
vtkPlusVirtualTextRecognizer.cxx
Go to the documentation of this file.
1 /*=Plus=header=begin======================================================
2 Program: Plus
3 Copyright (c) Laboratory for Percutaneous Surgery. All rights reserved.
4 See License.txt for details.
5 =========================================================Plus=header=end*/
6 
7 #include "PlusConfigure.h"
8 
9 #include "igsioCommon.h"
10 #include "vtkPlusDataCollector.h"
11 #include "vtkObjectFactory.h"
12 #include "vtkPlusChannel.h"
13 #include "vtkPlusDataSource.h"
14 #include "vtkIGSIOTrackedFrameList.h"
16 
17 // Tesseract includes
18 #include <tesseract/baseapi.h>
19 #include <tesseract/strngs.h>
20 #include <allheaders.h>
21 
22 //----------------------------------------------------------------------------
23 
25 
26 //----------------------------------------------------------------------------
27 
28 namespace
29 {
30  static const char* PARAMETER_LIST_TAG_NAME = "TextFields";
31  static const char* PARAMETER_TAG_NAME = "Field";
32  static const char* PARAMETER_NAME_ATTRIBUTE = "Name";
33  static const char* PARAMETER_CHANNEL_ATTRIBUTE = "Channel";
34  static const char* PARAMETER_ORIGIN_ATTRIBUTE = "InputRegionOrigin";
35  static const char* PARAMETER_SIZE_ATTRIBUTE = "InputRegionSize";
36  static const int PARAMETER_DEPTH_BITS = 8;
37  static const char* DEFAULT_LANGUAGE = "eng";
38  static const int TEXT_RECOGNIZER_MISSING_INPUT_DEFAULT = 1;
39 }
40 
41 //----------------------------------------------------------------------------
43  : vtkPlusDevice()
44  , Language()
45  , TrackedFrames(vtkIGSIOTrackedFrameList::New())
46  , OutputChannel(NULL)
47 {
48  // The data capture thread will be used to regularly check the input devices and generate and update the output
49  this->StartThreadForInternalUpdates = true;
51 }
52 
53 //----------------------------------------------------------------------------
55 {
56  for (ChannelFieldListMapIterator it = this->RecognitionFields.begin(); it != this->RecognitionFields.end(); ++it)
57  {
58  for (FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
59  {
60  TextFieldParameter* parameter = *fieldIt;
61  delete parameter;
62  }
63  it->second.clear();
64  }
65  this->RecognitionFields.clear();
66 }
67 
68 //----------------------------------------------------------------------------
70 {
71  TrackedFrames->Delete();
72  TrackedFrames = NULL;
73 }
74 
75 //----------------------------------------------------------------------------
76 void vtkPlusVirtualTextRecognizer::PrintSelf(ostream& os, vtkIndent indent)
77 {
78  this->Superclass::PrintSelf(os, indent);
79 }
80 
81 #ifdef PLUS_TEST_TextRecognizer
82 //----------------------------------------------------------------------------
83 vtkPlusVirtualTextRecognizer::ChannelFieldListMap& vtkPlusVirtualTextRecognizer::GetRecognitionFields()
84 {
85  return this->RecognitionFields;
86 }
87 #endif
88 
89 //----------------------------------------------------------------------------
91 {
92  std::map<double, int> queriedFramesIndexes;
93  std::vector<igsioTrackedFrame*> queriedFrames;
94 
95  if (!this->HasGracePeriodExpired())
96  {
97  return PLUS_SUCCESS;
98  }
99 
100  for (ChannelFieldListMapIterator it = this->RecognitionFields.begin(); it != this->RecognitionFields.end(); ++it)
101  {
102  for (FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
103  {
104  TextFieldParameter* parameter = *fieldIt;
105  igsioTrackedFrame frame;
106 
107  // Attempt to find the frame already retrieved
108  PlusStatus result = FindOrQueryFrame(frame, queriedFramesIndexes, parameter, queriedFrames);
109 
110  if (result != PLUS_SUCCESS || frame.GetImageData()->GetImage() == NULL)
111  {
112  continue;
113  }
114 
115  // We have a frame, let's parse it
116  vtkImageDataToPix(frame, parameter);
117 
118  this->TesseractAPI->SetImage(parameter->ReceivedFrame);
119  char* text_out = this->TesseractAPI->GetUTF8Text();
120  std::string textStr(text_out);
121  parameter->LatestParameterValue = igsioCommon::Trim(textStr);
122  delete [] text_out;
123 
124  frame.SetFrameField(parameter->ParameterName, parameter->LatestParameterValue);
125  }
126  }
127 
128  // Build the field map to send to the data sources
129  igsioFieldMapType fieldMap;
130  for (ChannelFieldListMapIterator it = this->RecognitionFields.begin(); it != this->RecognitionFields.end(); ++it)
131  {
132  for (FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
133  {
134  fieldMap[(*fieldIt)->ParameterName].first = FRAMEFIELD_NONE;
135  fieldMap[(*fieldIt)->ParameterName].second = (*fieldIt)->LatestParameterValue;
136  }
137  }
138 
140  {
141  it->second->AddItem(fieldMap, this->FrameNumber);
142  }
143  this->FrameNumber++;
144 
145  return PLUS_SUCCESS;
146 }
147 
148 //----------------------------------------------------------------------------
149 void vtkPlusVirtualTextRecognizer::vtkImageDataToPix(igsioTrackedFrame& frame, TextFieldParameter* parameter)
150 {
151  igsioVideoFrame::GetOrientedClippedImage(frame.GetImageData()->GetImage(),
152  igsioVideoFrame::FlipInfoType(),
153  frame.GetImageData()->GetImageType(),
154  parameter->ScreenRegion,
155  parameter->Origin,
156  parameter->Size);
157 
158  unsigned int* data = pixGetData(parameter->ReceivedFrame);
159  int wpl = pixGetWpl(parameter->ReceivedFrame);
160  int bpl = ((8 * parameter->Size[0]) + 7) / 8;
161  unsigned int* line;
162  unsigned char val8;
163 
164  int extents[6];
165  parameter->ScreenRegion->GetExtent(extents);
166  int ySize = extents[3] - extents[2];
167 
168  int coords[3] = {0, 0, 0};
169  for (int y = 0; y < ySize; y++)
170  {
171  coords[1] = ySize - y - 1 + extents[2];
172  line = data + y * wpl;
173  for (int x = 0; x < bpl; x++)
174  {
175  coords[0] = x + extents[0];
176  val8 = (*(unsigned char*)parameter->ScreenRegion->GetScalarPointer(coords));
177  SET_DATA_BYTE(line, x, val8);
178  }
179  }
180 }
181 
182 //----------------------------------------------------------------------------
183 PlusStatus vtkPlusVirtualTextRecognizer::FindOrQueryFrame(igsioTrackedFrame& frame, std::map<double, int>& QueriedFramesIndexes, TextFieldParameter* parameter, std::vector<igsioTrackedFrame*>& QueriedFrames)
184 {
185  double mostRecent(-1);
186 
187  if (!parameter->SourceChannel->GetVideoDataAvailable())
188  {
189  LOG_WARNING("Processed data is not generated, as no video data is available yet. Device ID: " << this->GetDeviceId());
190  return PLUS_FAIL;
191  }
192 
193  if (parameter->SourceChannel->GetMostRecentTimestamp(mostRecent) != PLUS_SUCCESS)
194  {
195  LOG_ERROR("Unable to retrieve most recent timestamp for parameter " << parameter->ParameterName);
196  return PLUS_FAIL;
197  }
198 
199  // Search the retrieved frames to see is this timestamp has already been pulled
200  std::map<double, int>::iterator frameIt = QueriedFramesIndexes.find(mostRecent);
201 
202  // If it hasn't, go get the latest
203  if (frameIt == QueriedFramesIndexes.end())
204  {
205  this->TrackedFrames->Clear();
206  double aTimestamp(UNDEFINED_TIMESTAMP);
207  if (parameter->SourceChannel->GetTrackedFrameList(aTimestamp, this->TrackedFrames, 1) != PLUS_SUCCESS)
208  {
209  LOG_INFO("Failed to get tracked frame list from data collector.");
210  return PLUS_FAIL;
211  }
212  double timestamp = TrackedFrames->GetTrackedFrame(0)->GetTimestamp();
213 
214  // Copy the frame so it isn't lost when the tracked frame list is cleared
215  frame = (*TrackedFrames->GetTrackedFrame(0));
216 
217  // Record the index of this timestamp
218  QueriedFramesIndexes[timestamp] = QueriedFrames.size();
219  QueriedFrames.push_back(&frame);
220  }
221  else
222  {
223  frame = (*QueriedFrames[frameIt->second]);
224  }
225 
226  return PLUS_SUCCESS;
227 }
228 
229 //----------------------------------------------------------------------------
231 {
232  if (this->TessdataDirectory.empty())
233  {
234  if (!vtksys::SystemTools::GetEnv("TESSDATA_PREFIX", this->TessdataDirectory))
235  {
236  this->SetTessdataDirectory(vtkPlusConfig::GetInstance()->GetImagePath("tessdata"));
237  }
238  }
239  std::stringstream ss;
240  ss << "TESSDATA_PREFIX=" << this->TessdataDirectory;
241  vtksys::SystemTools::PutEnv(ss.str());
242  LOG_DEBUG("Using tessdata directory: " << this->TessdataDirectory);
243 
244  this->TesseractAPI = new tesseract::TessBaseAPI();
245  if (this->TesseractAPI->Init(NULL, Language.c_str(), tesseract::OEM_TESSERACT_CUBE_COMBINED) != 0)
246  {
247  LOG_ERROR("Unable to init tesseract library. Cannot perform text recognition.");
248  return PLUS_FAIL;
249  }
250  this->TesseractAPI->SetPageSegMode(tesseract::PSM_SINGLE_LINE);
251 
252  return PLUS_SUCCESS;
253 }
254 
255 //----------------------------------------------------------------------------
257 {
258  delete this->TesseractAPI;
259  this->TesseractAPI = NULL;
260 
262 
263  return PLUS_SUCCESS;
264 }
265 
266 //----------------------------------------------------------------------------
267 PlusStatus vtkPlusVirtualTextRecognizer::ReadConfiguration(vtkXMLDataElement* rootConfigElement)
268 {
270 
271  vtkXMLDataElement* deviceConfig = this->FindThisDeviceElement(rootConfigElement);
272  if (deviceConfig == NULL)
273  {
274  LOG_ERROR("Unable to continue configuration of " << this->GetClassName() << ". Could not find corresponding element.");
275  return PLUS_FAIL;
276  }
277 
278  Superclass::ReadConfiguration(rootConfigElement);
279 
280  if (this->MissingInputGracePeriodSec < TEXT_RECOGNIZER_MISSING_INPUT_DEFAULT)
281  {
282  LOG_WARNING("MissingInputGracePeriodSec must be set to a value > 1s to allow input to arrive and be processed.");
283  this->MissingInputGracePeriodSec = TEXT_RECOGNIZER_MISSING_INPUT_DEFAULT;
284  }
285 
286  this->SetLanguage(DEFAULT_LANGUAGE);
287  XML_READ_CSTRING_ATTRIBUTE_OPTIONAL(Language, deviceConfig);
288  XML_READ_STRING_ATTRIBUTE_OPTIONAL(TessdataDirectory, deviceConfig);
289 
290  XML_FIND_NESTED_ELEMENT_OPTIONAL(screenFields, deviceConfig, PARAMETER_LIST_TAG_NAME);
291 
292  for (int i = 0; i < screenFields->GetNumberOfNestedElements(); ++i)
293  {
294  vtkXMLDataElement* fieldElement = screenFields->GetNestedElement(i);
295 
296  if (STRCASECMP(fieldElement->GetName(), PARAMETER_TAG_NAME) != 0)
297  {
298  continue;
299  }
300 
301  const char* channelName = fieldElement->GetAttribute(PARAMETER_CHANNEL_ATTRIBUTE);
302  vtkPlusChannel* aChannel;
303  if (channelName == NULL || this->GetDataCollector()->GetChannel(aChannel, channelName) != PLUS_SUCCESS)
304  {
305  LOG_ERROR("Cannot build field scanner. Input " << PARAMETER_CHANNEL_ATTRIBUTE << " is not defined or invalid " << PARAMETER_CHANNEL_ATTRIBUTE << " name specified.");
306  continue;
307  }
308 
309  if (fieldElement->GetAttribute(PARAMETER_NAME_ATTRIBUTE) == NULL)
310  {
311  LOG_ERROR("Parameter " << PARAMETER_NAME_ATTRIBUTE << " not defined. Unable to build field scanner.");
312  continue;
313  }
314 
315  int origin[2] = {-1, -1};
316  int size[2] = {-1, -1};
317  fieldElement->GetVectorAttribute(PARAMETER_ORIGIN_ATTRIBUTE, 2, origin);
318  fieldElement->GetVectorAttribute(PARAMETER_SIZE_ATTRIBUTE, 2, size);
319  if (origin[0] < 0 || origin[1] < 0 || size[0] < 0 || size[1] < 0)
320  {
321  LOG_ERROR("Invalid definition for " << PARAMETER_ORIGIN_ATTRIBUTE << " and " << PARAMETER_SIZE_ATTRIBUTE << ". Unable to build field scanner.");
322  continue;
323  }
324 
325  TextFieldParameter* parameter = new TextFieldParameter();
326  parameter->ParameterName = std::string(fieldElement->GetAttribute(PARAMETER_NAME_ATTRIBUTE));
327  parameter->SourceChannel = aChannel;
328  parameter->Origin[0] = origin[0];
329  parameter->Origin[1] = origin[1];
330  parameter->Size[0] = size[0];
331  parameter->Size[1] = size[1];
332  parameter->ReceivedFrame = pixCreate(parameter->Size[0], parameter->Size[1], PARAMETER_DEPTH_BITS);
333  parameter->ScreenRegion = vtkSmartPointer<vtkImageData>::New();
334  parameter->ScreenRegion->SetExtent(0, size[0] - 1, 0, size[1] - 1, 0, 0);
335  parameter->ScreenRegion->AllocateScalars(VTK_UNSIGNED_CHAR, 1); // Black and white images for now
336 
337  this->RecognitionFields[parameter->SourceChannel].push_back(parameter);
338  }
339 
340  return PLUS_SUCCESS;
341 }
342 
343 //----------------------------------------------------------------------------
345 {
346  XML_FIND_DEVICE_ELEMENT_REQUIRED_FOR_WRITING(deviceConfig, rootConfigElement);
347 
348  if (!igsioCommon::IsEqualInsensitive(this->Language, DEFAULT_LANGUAGE))
349  {
350  XML_WRITE_STRING_ATTRIBUTE_IF_NOT_EMPTY(Language, deviceConfig);
351  }
352 
353  XML_FIND_NESTED_ELEMENT_CREATE_IF_MISSING(screenFields, deviceConfig, PARAMETER_LIST_TAG_NAME);
354 
355  for (ChannelFieldListMapIterator it = this->RecognitionFields.begin(); it != this->RecognitionFields.end(); ++it)
356  {
357  for (FieldListIterator fieldIt = it->second.begin(); fieldIt != it->second.end(); ++fieldIt)
358  {
359  TextFieldParameter* parameter = *fieldIt;
360 
361  XML_FIND_NESTED_ELEMENT_CREATE_IF_MISSING(fieldElement, screenFields, PARAMETER_TAG_NAME);
362 
363  fieldElement->SetAttribute(PARAMETER_CHANNEL_ATTRIBUTE, parameter->SourceChannel->GetChannelId());
364  fieldElement->SetAttribute(PARAMETER_NAME_ATTRIBUTE, parameter->ParameterName.c_str());
365  fieldElement->SetVectorAttribute(PARAMETER_ORIGIN_ATTRIBUTE, 2, parameter->Origin.data());
366  fieldElement->SetVectorAttribute(PARAMETER_SIZE_ATTRIBUTE, 2, parameter->Size.data());
367  }
368  }
369 
370  return PLUS_SUCCESS;
371 }
372 
373 //----------------------------------------------------------------------------
375 {
376  if (this->InputChannels.size() < 1)
377  {
378  LOG_ERROR("Screen reader needs at least one input image to analyze. Please add an input channel with video data.");
379  return PLUS_FAIL;
380  }
381 
382  if (!this->InputChannels[0]->HasVideoSource())
383  {
384  LOG_ERROR("Input channel does not have a video source. Need video to analyze.");
385  return PLUS_FAIL;
386  }
387 
388  if (this->OutputChannels.size() != 1)
389  {
390  LOG_ERROR("No output channels defined. Recognizer needs an output channel to send text.");
391  return PLUS_FAIL;
392  }
393 
394  if (this->RecognitionFields.size() < 1)
395  {
396  LOG_ERROR("Screen reader has no fields defined. There's nothing for me to do!");
397  }
398 
399  if (this->OutputChannels.size() > 0)
400  {
401  this->OutputChannel = this->OutputChannels[0];
402  }
403  else
404  {
405  LOG_ERROR("Screen reader requires an output channel.");
406  return PLUS_FAIL;
407  }
408 
409  if (!this->OutputChannel->GetFieldDataEnabled())
410  {
411  LOG_ERROR("Screen reader requires an output channel with at least one field data source defined.");
412  return PLUS_FAIL;
413  }
414 
415  return PLUS_SUCCESS;
416 }
static const int VIRTUAL_DEVICE_FRAME_RATE
const uint32_t * data
Definition: phidget22.h:3971
virtual void PrintSelf(ostream &os, vtkIndent indent) VTK_OVERRIDE
Abstract interface for tracker and video devices.
Definition: vtkPlusDevice.h:60
double * timestamp
Definition: phidget22.h:3432
const char int line
Definition: phidget22.h:2458
#define XML_FIND_DEVICE_ELEMENT_REQUIRED_FOR_WRITING(deviceConfig, rootConfigElement)
ChannelFieldListMap::iterator ChannelFieldListMapIterator
vtkPlusChannel * OutputChannel
Optional output channel to store recognized fields for broadcasting.
igsioStatus PlusStatus
Definition: PlusCommon.h:40
ChannelContainer InputChannels
DataSourceContainerIterator GetFieldDataSourcesStartIterator()
virtual std::string GetDeviceId() const
vtkIGSIOTrackedFrameList * TrackedFrames
for i
double AcquisitionRate
virtual PlusStatus ReadConfiguration(vtkXMLDataElement *)
#define PLUS_FAIL
Definition: PlusCommon.h:43
virtual vtkPlusDataCollector * GetDataCollector()
static vtkPlusConfig * GetInstance()
bool GetFieldDataEnabled() const
virtual PlusStatus WriteConfiguration(vtkXMLDataElement *)
virtual void PrintSelf(ostream &os, vtkIndent indent) VTK_OVERRIDE
virtual PlusStatus ReadConfiguration(vtkXMLDataElement *)
double MissingInputGracePeriodSec
unsigned long FrameNumber
vtkStandardNewMacro(vtkPlusVirtualTextRecognizer)
#define PLUS_SUCCESS
Definition: PlusCommon.h:44
virtual const char * GetClassName()
vtkXMLDataElement * FindThisDeviceElement(vtkXMLDataElement *rootXMLElement)
PlusStatus FindOrQueryFrame(igsioTrackedFrame &frame, std::map< double, int > &queriedFramesIndexes, TextFieldParameter *parameter, std::vector< igsioTrackedFrame * > &queriedFrames)
If a frame has been queried for this input channel, reuse it instead of getting a new one.
DataSourceContainer::iterator DataSourceContainerIterator
int x
Definition: phidget22.h:4265
int int int int ySize
Definition: phidget22.h:4283
void vtkImageDataToPix(igsioTrackedFrame &frame, TextFieldParameter *parameter)
Convert a vtkImage data to leptonica pix format.
bool StartThreadForInternalUpdates
bool HasGracePeriodExpired()
const char ** channelName
Definition: phidget22.h:1307
Contains an optional timestamped circular buffer containing the video images and a number of timestam...
ChannelContainer OutputChannels
Direction vectors of rods y
Definition: algo3.m:15
ChannelFieldListMap RecognitionFields
Map of channels to fields so that we only have to grab an image once from the each source channel.
void ClearConfiguration()
Remove any configuration data.
tesseract::TessBaseAPI * TesseractAPI
Main entry point for the tesseract API.
std::map< vtkPlusChannel *, FieldList > ChannelFieldListMap
DataSourceContainerIterator GetFieldDataSourcesEndIterator()
std::string Language
Language used for detection.