Leviathan  0.8.0.0
Leviathan game engine
Leviathan::MatroskaParser Class Reference

Basic parser for Matroska (.mkv) container for use with GUI::VideoPlayer. More...

#include <MatroskaParser.h>

Classes

struct  BlockInfo
 
struct  BlockSearchInfo
 
struct  BlockSearchResult
 
struct  ClusterInfo
 
struct  ParsedInformation
 
struct  TrackInfo
 

Public Types

enum  TRACK_TYPE {
  TRACK_TYPE::Video = 1, TRACK_TYPE::Audio = 2, TRACK_TYPE::Complex = 3, TRACK_TYPE::Logo = 16,
  TRACK_TYPE::Subtitle = 17, TRACK_TYPE::Buttons = 18, TRACK_TYPE::Control = 32
}
 

Public Member Functions

DLLEXPORT MatroskaParser (const std::string &file)
 
 MatroskaParser (const MatroskaParser &other)
 
MatroskaParseroperator= (const MatroskaParser &other)
 
DLLEXPORT void FindTopLevelElements ()
 
bool Good () const
 
const auto & GetErrorMessage () const
 
const auto & GetHeader () const
 
const auto GetTracks () const
 
const auto GetVideoTrackCount () const
 
const auto GetAudioTrackCount () const
 
DLLEXPORT float GetDurationInSeconds () const
 
DLLEXPORT const TrackInfoGetFirstVideoTrack () const
 
DLLEXPORT const TrackInfoGetFirstAudioTrack () const
 
DLLEXPORT std::vector< uint8_t > ReadTrackCodecPrivateData (const TrackInfo &track)
 Reads the codec private data for a track. More...
 
DLLEXPORT void JumpToFirstCluster ()
 Jumps the read position to the first cluster. More...
 
DLLEXPORT std::tuple< const uint8_t *, size_t, BlockInfoGetNextBlockForTrack (int tracknumber)
 Gets the next block matching track number. More...
 
DLLEXPORT std::tuple< bool, size_t, BlockInfoPeekNextBlockForTrack (int tracknumber) const
 Peeks at the next block matching track number. More...
 

Static Public Member Functions

static DLLEXPORT uint64_t ReadVariableLengthUnsignedInteger (const std::vector< uint8_t > &data, int length)
 Reads a variable length unsigned integer (in big endian form) into an integer. More...
 
static DLLEXPORT double ReadVariableLengthFloat (const std::vector< uint8_t > &data, int length)
 Reads a variable length float (in big endian form) More...
 
static DLLEXPORT std::vector< std::tuple< const uint8_t *, size_t > > SplitVorbisPrivateSetupData (const uint8_t *codecprivatedata, size_t datalength)
 Splits Vorbis private data into the 3 setup packets. More...
 

Static Public Attributes

static constexpr auto CODEC_TYPE_AV1 = "V_AV1"
 
static constexpr auto CODEC_TYPE_VORBIS = "A_VORBIS"
 
static constexpr auto CODEC_TYPE_OPUS = "A_OPUS"
 
static constexpr float MATROSKA_DURATION_TO_SECONDS = 0.001f
 

Protected Member Functions

void SetError (const std::string &error)
 
DLLEXPORT bool ParseHeaderValues (int headerSizeLeft)
 
DLLEXPORT void HandleInfoElement (const EBMLElement &element)
 
DLLEXPORT void HandleTracksElement (const EBMLElement &element)
 
DLLEXPORT void HandleTrackEntryElement (const EBMLElement &element)
 
DLLEXPORT void HandleClusterElement (const EBMLElement &element)
 
DLLEXPORT bool _UpdateFindClusterInfo (BlockSearchInfo &info) const
 
DLLEXPORT bool _FindNextBlock (int tracknumber, BlockSearchResult &result) const
 
DLLEXPORT bool _SearchForNextBlock (int tracknumber, BlockSearchResult &result) const
 

Static Protected Member Functions

static DLLEXPORT uint64_t ApplyRelativeTimecode (uint64_t base, int16_t relative)
 

Detailed Description

Basic parser for Matroska (.mkv) container for use with GUI::VideoPlayer.

This parser is written based on the following documents: https://matroska.org/technical/diagram/index.html https://matroska.org/technical/specs/index.html

Definition at line 20 of file MatroskaParser.h.

Member Enumeration Documentation

◆ TRACK_TYPE

Enumerator
Video 
Audio 
Complex 
Logo 
Subtitle 
Buttons 
Control 

Definition at line 50 of file MatroskaParser.h.

50  {
51  Video = 1,
52  Audio = 2,
53  Complex = 3,
54  Logo = 16,
55  Subtitle = 17,
56  Buttons = 18,
57  Control = 32,
58  };

Constructor & Destructor Documentation

◆ MatroskaParser() [1/2]

DLLEXPORT MatroskaParser::MatroskaParser ( const std::string &  file)
Note
Parses the matroska header. Check Good() after construction to see if the file was valid

Definition at line 377 of file MatroskaParser.cpp.

377  :
378  File(file), Reader(File, std::ios::binary)
379 {
380  if(!Reader.good()) {
381  SetError("File can't be read");
382  return;
383  }
384 
385  // EBML header
386  uint32_t magic;
387  Reader.read(reinterpret_cast<char*>(&magic), sizeof(magic));
388  magic = boost::endian::big_to_native(magic);
389 
390  if(magic != 0x1A45DFA3) {
391  SetError("wrong magic, got: " + Convert::ToHexadecimalString(magic) +
392  " expected: " + Convert::ToHexadecimalString(EBML_MAGIC));
393  return;
394  }
395 
396  EBMLLengthValue headerLength(Reader);
397 
398  if(!Reader.good()) {
399  SetError("reading header length failed");
400  return;
401  }
402 
403  LOG_INFO("Reading matroska file: " + file +
404  ", magic is correct, header length: " + std::to_string(headerLength.Value));
405 
406  Parsed.FirstEBMLElementOffset =
407  sizeof(magic) + headerLength.ByteLength + headerLength.Value;
408 
409  // Parse the rest of the header. This sets any errors it encounters and returns false if
410  // the file is unsupported
411  if(!ParseHeaderValues(headerLength.Value)) {
412  Error = true;
413  return;
414  }
415 
416  if(static_cast<size_t>(Reader.tellg()) != Parsed.FirstEBMLElementOffset) {
417  SetError("reader ended up at the wrong position, it should have ended up as the first "
418  "segment start");
419  }
420 
421  // Look for all top level elements for later use
423 }
#define LOG_INFO(x)
Definition: Define.h:90
void SetError(const std::string &error)
DLLEXPORT bool ParseHeaderValues(int headerSizeLeft)
DLLEXPORT void FindTopLevelElements()
constexpr uint32_t EBML_MAGIC
static std::string ToHexadecimalString(const T &val)
Definition: Convert.h:93
unsigned int uint32_t
Definition: core.h:40
Decodes a length encoded in EBML.

◆ MatroskaParser() [2/2]

Leviathan::MatroskaParser::MatroskaParser ( const MatroskaParser other)
inline

Definition at line 133 of file MatroskaParser.h.

133  :
134  Error(other.Error), ErrorMessage(other.ErrorMessage), Parsed(other.Parsed),
135  ClusterBlockIterator(other.ClusterBlockIterator), File(other.File)
136  {
137  Reader.open(File, std::ios::binary);
138  Reader.seekg(other.Reader.tellg());
139  }

Member Function Documentation

◆ _FindNextBlock()

DLLEXPORT bool MatroskaParser::_FindNextBlock ( int  tracknumber,
BlockSearchResult result 
) const
protected

If this finds a block it's guaranteed that Reader is positioned at the start of data in case the caller wants to read the block data

Definition at line 946 of file MatroskaParser.cpp.

947 {
948  if(!_UpdateFindClusterInfo(result.Search))
949  return false;
950 
951  Reader.seekg(*result.Search.NextReadPos);
952 
953  if(Error || !Reader.good()) {
954  LOG_ERROR("MatroskaParser: start search pos is invalid or error is set");
955  return false;
956  }
957 
958  bool working = true;
959 
960  while(working) {
961  EBMLElement currentElement(Reader);
962 
963  if(!Reader.good()) {
964  LOG_ERROR("MatroskaParser: element ended while parsing its header");
965  return false;
966  }
967 
968  const auto currentElementEnd = currentElement.DataStart + currentElement.Length.Value;
969 
970  bool clusterEnded = false;
971 
972  // Don't jump past the ond of the current cluster (without properly going to the
973  // next cluster)
974  auto setNextReadPos = [&]() {
975  if(currentElementEnd >= *result.Search.CurrentClusterEnd) {
976  // Needs to jump to next cluster
977  clusterEnded = true;
978 
979  if(result.Search.NextCluster) {
980  result.Search.NextReadPos = *result.Search.NextCluster;
981  } else {
982  // No more clusters
983  result.Search.NextReadPos.reset();
984  }
985  } else {
986  result.Search.NextReadPos = currentElementEnd;
987  }
988  };
989 
990  switch(currentElement.Identifier.Value) {
991  // The current cluster already contains the time code
992  // TODO: that is not optimal if there are a ton of clusters (ie. longer videos are
993  // attempted to be played)
994  // case ELEMENT_TYPE_TIMECODE:
996  ClusterBlockHeader foundBlock(Reader);
997 
998  if(!Reader.good()) {
999  LOG_ERROR("MatroskaParser:: data ended while decoding a cluster block header");
1000  working = false;
1001  break;
1002  }
1003 
1004  if(static_cast<int>(foundBlock.TrackIdentifier.Value) == tracknumber) {
1005 
1006  result.BlockElement = const_cast<const EBMLElement&>(currentElement);
1007  result.FoundBlock = const_cast<const ClusterBlockHeader&>(foundBlock);
1008 
1009  result.BlockData = BlockInfo{ApplyRelativeTimecode(
1010  result.Search.ClusterTimeCode, result.FoundBlock->RelativeTimecode)};
1011 
1012  // We need to set the read pos for the next block in case our caller needs it
1013  setNextReadPos();
1014 
1015  return true;
1016  }
1017  }
1018  // Else let the default case handle jumping over the current data as we didn't want
1019  // this block
1020  default:
1021  // Unknown data, we need to jump past
1022  setNextReadPos();
1023 
1024  // If the jump is past the end of the current cluster we need to update which
1025  // cluster we are in
1026  if(clusterEnded) {
1027  if(!_UpdateFindClusterInfo(result.Search)) {
1028  // Ran out of clusters
1029  working = false;
1030  break;
1031  }
1032  }
1033 
1034  Reader.seekg(*result.Search.NextReadPos);
1035  }
1036  }
1037 
1038  return false;
1039 }
static DLLEXPORT uint64_t ApplyRelativeTimecode(uint64_t base, int16_t relative)
#define LOG_ERROR(x)
Definition: Define.h:92
A basic EBML element.
A block inside a cluster.
DLLEXPORT bool _UpdateFindClusterInfo(BlockSearchInfo &info) const
constexpr uint64_t ELEMENT_TYPE_SIMPLE_BLOCK

◆ _SearchForNextBlock()

DLLEXPORT bool MatroskaParser::_SearchForNextBlock ( int  tracknumber,
BlockSearchResult result 
) const
protected

Definition at line 1041 of file MatroskaParser.cpp.

1043 {
1044  if(!ClusterBlockIterator || ClusterBlockIterator->LastBlockRead)
1045  return false;
1046 
1047  result.Search.NextReadPos = ClusterBlockIterator->NextReadPos;
1048 
1049  if(!_FindNextBlock(tracknumber, result) || !result.FoundBlock) {
1050  // Out of data
1051  return false;
1052  }
1053 
1054  return true;
1055 }
DLLEXPORT bool _FindNextBlock(int tracknumber, BlockSearchResult &result) const

◆ _UpdateFindClusterInfo()

DLLEXPORT bool MatroskaParser::_UpdateFindClusterInfo ( BlockSearchInfo info) const
protected

Definition at line 903 of file MatroskaParser.cpp.

904 {
905  if(Error || !info.NextReadPos) {
906  return false;
907  }
908 
909  info.CurrentClusterEnd.reset();
910  info.NextCluster.reset();
911 
912 
913  // TODO: this loop is also not optimal if there are a ton of clusters (could be
914  // replaced with a binary search or caching)
915  for(size_t i = Parsed.Clusters.size() - 1;; --i) {
916  const auto& cluster = Parsed.Clusters[i];
917 
918  if(cluster.DataStart <= *info.NextReadPos) {
919  // In this cluster
920  info.CurrentClusterEnd = cluster.DataStart + cluster.Lenght;
921  info.ClusterTimeCode = cluster.Timecode;
922 
923  if(i + 1 < Parsed.Clusters.size()) {
924  // There is a next cluster
925  info.NextCluster = Parsed.Clusters[i + 1].DataStart;
926  }
927  break;
928  }
929 
930  if(i == 0) {
931  LOG_ERROR("MatroskaParser: could not find which cluster current position is in: " +
932  std::to_string(*info.NextReadPos));
933  return false;
934  }
935  }
936 
937  if(!info.CurrentClusterEnd) {
938  LOG_ERROR("MatroskaParser: could not find which cluster current "
939  "read position is in");
940  return false;
941  }
942 
943  return true;
944 }
#define LOG_ERROR(x)
Definition: Define.h:92

◆ ApplyRelativeTimecode()

DLLEXPORT uint64_t MatroskaParser::ApplyRelativeTimecode ( uint64_t  base,
int16_t  relative 
)
staticprotected

Definition at line 1096 of file MatroskaParser.cpp.

1097 {
1098  // Make sure not to add too big negative number to cause a time point wrap to a huge value
1099  if(relative < 0) {
1100  if(base >= static_cast<unsigned>(-1 * relative)) {
1101  return base + relative;
1102  } else {
1103  return 0;
1104  }
1105  }
1106 
1107  return base + relative;
1108 }

◆ FindTopLevelElements()

DLLEXPORT void MatroskaParser::FindTopLevelElements ( )

Definition at line 524 of file MatroskaParser.cpp.

525 {
526  Reader.seekg(Parsed.FirstEBMLElementOffset);
527 
528  if(!Reader.good()) {
529  SetError("Parsed.FirstEBMLElementOffset is invalid offset");
530  return;
531  }
532 
533  EBMLElement segment(Reader);
534 
535  if(!Reader.good()) {
536  SetError("Segment ended while parsing its header");
537  return;
538  }
539 
540  if(static_cast<size_t>(Reader.tellg()) != segment.DataStart) {
541  SetError("Segment start pos mismatched with data start");
542  return;
543  }
544 
545  if(segment.Identifier.Value != ELEMENT_TYPE_SEGMENT) {
546  SetError("unexpected element identifier where Segment should begin");
547  return;
548  }
549 
550  int result =
551  segment.ReadChildElements(Reader, [&](const EBMLElement& element, auto readdata) {
552  switch(element.Identifier.Value) {
553  case ELEMENT_TYPE_INFO: HandleInfoElement(element); break;
554  case ELEMENT_TYPE_TRACKS: HandleTracksElement(element); break;
555  case ELEMENT_TYPE_CLUSTER:
556  HandleClusterElement(element);
557  break;
558  // Not useful elements
559  case ELEMENT_TYPE_CUES:
560  case ELEMENT_TYPE_TAGS:
561  case ELEMENT_TYPE_VOID:
562  case ELEMENT_TYPE_SEEK_HEAD:
563  default: break;
564  }
565 
566  return true;
567  });
568 
569  if(result < 0) {
570  SetError("reading file data failed while parsing EBML element, at pos: " +
571  std::to_string(Reader.tellg()));
572  return;
573  }
574 
575  if(Parsed.Duration < 0.f) {
576  SetError("Failed to find info element with duration");
577  }
578 }
EBMLLIdentifierValue Identifier
A basic EBML element.
void SetError(const std::string &error)
constexpr uint64_t ELEMENT_TYPE_SEGMENT
uint64_t Value
The contents of this value.

◆ GetAudioTrackCount()

const auto Leviathan::MatroskaParser::GetAudioTrackCount ( ) const
inline

Definition at line 180 of file MatroskaParser.h.

181  {
182  return Parsed.AudioTrackCount;
183  }

◆ GetDurationInSeconds()

DLLEXPORT float MatroskaParser::GetDurationInSeconds ( ) const
Returns
The duration in seconds or -1.f if that info is unavailable

Definition at line 798 of file MatroskaParser.cpp.

799 {
800  if(Parsed.Duration < 0)
801  return -1.f;
802  return Parsed.Duration * MATROSKA_DURATION_TO_SECONDS;
803 }
static constexpr float MATROSKA_DURATION_TO_SECONDS

◆ GetErrorMessage()

const auto& Leviathan::MatroskaParser::GetErrorMessage ( ) const
inline

Definition at line 160 of file MatroskaParser.h.

161  {
162  return ErrorMessage;
163  }

◆ GetFirstAudioTrack()

DLLEXPORT const MatroskaParser::TrackInfo & MatroskaParser::GetFirstAudioTrack ( ) const

Definition at line 815 of file MatroskaParser.cpp.

816 {
817  for(const auto& track : Parsed.Tracks) {
818  if(track.TrackType == TRACK_TYPE::Audio)
819  return track;
820  }
821 
822  throw Leviathan::InvalidState("no audio tracks exist");
823 }

◆ GetFirstVideoTrack()

DLLEXPORT const MatroskaParser::TrackInfo & MatroskaParser::GetFirstVideoTrack ( ) const
Returns
The first video track or throws an exception

Definition at line 805 of file MatroskaParser.cpp.

806 {
807  for(const auto& track : Parsed.Tracks) {
808  if(track.TrackType == TRACK_TYPE::Video)
809  return track;
810  }
811 
812  throw Leviathan::InvalidState("no video tracks exist");
813 }

◆ GetHeader()

const auto& Leviathan::MatroskaParser::GetHeader ( ) const
inline

Definition at line 165 of file MatroskaParser.h.

166  {
167  return Parsed;
168  }

◆ GetNextBlockForTrack()

DLLEXPORT std::tuple< const uint8_t *, size_t, MatroskaParser::BlockInfo > MatroskaParser::GetNextBlockForTrack ( int  tracknumber)

Gets the next block matching track number.

If there is no next block this returns (nullptr, 0, BlockInfo{})

Definition at line 857 of file MatroskaParser.cpp.

858 {
859  BlockSearchResult result;
860  if(!_SearchForNextBlock(tracknumber, result)) {
861  // Out of data
862  ClusterBlockIterator.reset();
863  return std::make_tuple(nullptr, 0, BlockInfo{});
864  }
865 
866  // Found a block, read its data to pass to our caller
867  const size_t dataBegin = Reader.tellg();
868 
869  const auto currentElementEnd =
870  result.BlockElement->DataStart + result.BlockElement->Length.Value;
871 
872  const auto nonHeaderBytes = currentElementEnd - dataBegin;
873  ClusterBlockIterator->DataBuffer.resize(nonHeaderBytes);
874 
875  Reader.read(reinterpret_cast<char*>(ClusterBlockIterator->DataBuffer.data()),
876  ClusterBlockIterator->DataBuffer.size());
877 
878  // Set the next position
879  if(result.Search.NextReadPos) {
880  ClusterBlockIterator->NextReadPos = *result.Search.NextReadPos;
881  } else {
882  // No more data
883  ClusterBlockIterator->LastBlockRead = true;
884  }
885 
886  // Return the block data
887  return std::make_tuple(ClusterBlockIterator->DataBuffer.data(),
888  ClusterBlockIterator->DataBuffer.size(), result.BlockData);
889 }
DLLEXPORT bool _SearchForNextBlock(int tracknumber, BlockSearchResult &result) const

◆ GetTracks()

const auto Leviathan::MatroskaParser::GetTracks ( ) const
inline

Definition at line 170 of file MatroskaParser.h.

171  {
172  return Parsed.Tracks;
173  }

◆ GetVideoTrackCount()

const auto Leviathan::MatroskaParser::GetVideoTrackCount ( ) const
inline

Definition at line 175 of file MatroskaParser.h.

176  {
177  return Parsed.VideoTrackCount;
178  }

◆ Good()

bool Leviathan::MatroskaParser::Good ( ) const
inline

Definition at line 155 of file MatroskaParser.h.

156  {
157  return !Error && Reader.good();
158  }

◆ HandleClusterElement()

DLLEXPORT void MatroskaParser::HandleClusterElement ( const EBMLElement element)
protected

Definition at line 776 of file MatroskaParser.cpp.

777 {
778  int result =
779  element.ReadChildElements(Reader, [&](const EBMLElement& childElement, auto readdata) {
780  switch(childElement.Identifier.Value) {
781  case ELEMENT_TYPE_TIMECODE:
782  // Just the time code is grabbed and this cluster info is them stored
783  const auto time =
784  ReadVariableLengthUnsignedInteger(readdata(), childElement.Length.Value);
785 
786  Parsed.Clusters.push_back(
787  ClusterInfo{element.DataStart, element.Length.Value, time});
788  return false;
789  }
790  return true;
791  });
792 
793  if(result < 0) {
794  SetError("reading a cluster element contents failed");
795  }
796 }
EBMLLIdentifierValue Identifier
A basic EBML element.
void SetError(const std::string &error)
int ReadChildElements(T &stream, Callback callback) const
uint64_t Value
The contents of this value.

◆ HandleInfoElement()

DLLEXPORT void MatroskaParser::HandleInfoElement ( const EBMLElement element)
protected

Definition at line 580 of file MatroskaParser.cpp.

581 {
582  int result =
583  element.ReadChildElements(Reader, [&](const EBMLElement& element, auto readdata) {
584  switch(element.Identifier.Value) {
585  case ELEMENT_TYPE_TIMECODE_SCALE:
586  Parsed.TimecodeScale =
587  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
588  break;
589  case ELEMENT_TYPE_MUXIN_APP:
590  Parsed.MuxinApp = std::string(
591  reinterpret_cast<char*>(readdata().data()), element.Length.Value);
592  break;
593  case ELEMENT_TYPE_WRITING_APP:
594  Parsed.WritingApp = std::string(
595  reinterpret_cast<char*>(readdata().data()), element.Length.Value);
596  break;
597  case ELEMENT_TYPE_DURATION:
598  Parsed.Duration = ReadVariableLengthFloat(readdata(), element.Length.Value);
599  break;
600  }
601 
602  return true;
603  });
604 
605  if(result < 0) {
606  SetError("reading info element contents failed");
607  }
608 }
EBMLLIdentifierValue Identifier
A basic EBML element.
void SetError(const std::string &error)
int ReadChildElements(T &stream, Callback callback) const
uint64_t Value
The contents of this value.

◆ HandleTrackEntryElement()

DLLEXPORT void MatroskaParser::HandleTrackEntryElement ( const EBMLElement element)
protected

Definition at line 626 of file MatroskaParser.cpp.

627 {
628  TrackInfo track;
629  bool error = false;
630 
631  int result = element.ReadChildElements(Reader, [&](const EBMLElement& element,
632  auto readdata) {
633  switch(element.Identifier.Value) {
634  case ELEMENT_TYPE_TRACK_NUMBER:
635  track.TrackNumber =
636  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
637  break;
638  case ELEMENT_TYPE_TRACK_UID:
639  track.TrackUID =
640  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
641  break;
642  case ELEMENT_TYPE_FLAG_LACING:
643  track.Lacing =
644  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value) != 0;
645  break;
646  case ELEMENT_TYPE_LANGUAGE:
647  track.Language =
648  std::string(reinterpret_cast<char*>(readdata().data()), element.Length.Value);
649  break;
650  case ELEMENT_TYPE_CODEC_ID:
651  track.CodecID =
652  std::string(reinterpret_cast<char*>(readdata().data()), element.Length.Value);
653  break;
654  case ELEMENT_TYPE_TRACK_TYPE: {
655  int type = ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
656 
657  switch(type) {
658  case EBML_TRACK_TYPE_VIDEO: track.TrackType = TRACK_TYPE::Video; break;
659  case EBML_TRACK_TYPE_AUDIO: track.TrackType = TRACK_TYPE::Audio; break;
660  case EBML_TRACK_TYPE_COMPLEX: track.TrackType = TRACK_TYPE::Complex; break;
661  case EBML_TRACK_TYPE_LOGO: track.TrackType = TRACK_TYPE::Logo; break;
662  case EBML_TRACK_TYPE_SUBTITLE: track.TrackType = TRACK_TYPE::Subtitle; break;
663  case EBML_TRACK_TYPE_BUTTONS: track.TrackType = TRACK_TYPE::Buttons; break;
664  case EBML_TRACK_TYPE_CONTROL: track.TrackType = TRACK_TYPE::Control; break;
665  default:
666  error = true;
667  SetError("unknown track type: " + std::to_string(type));
668  return false;
669  }
670  }
672  track.DefaultDuration =
673  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
674  break;
675  // These two have important child elements
676  case ELEMENT_TYPE_VIDEO: {
677  TrackInfo::Video video;
678  int result = element.ReadChildElements(Reader, [&](const EBMLElement& element,
679  auto readdata) {
680  switch(element.Identifier.Value) {
681  case ELEMENT_TYPE_PIXEL_WIDTH:
682  video.PixelWidth =
683  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
684  break;
685  case ELEMENT_TYPE_PIXEL_HEIGHT:
686  video.PixelHeight =
687  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
688  break;
689  case ELEMENT_TYPE_DISPLAY_UNIT:
690  video.DisplayUnit =
691  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
692  break;
693  }
694  return true;
695  });
696 
697  if(result < 1) {
698  SetError("parsing Video element failed");
699  error = true;
700  return false;
701  }
702 
703  track.TrackTypeData = video;
704  break;
705  }
706  case ELEMENT_TYPE_AUDIO: {
707  TrackInfo::Audio audio;
708  int result = element.ReadChildElements(Reader, [&](const EBMLElement& element,
709  auto readdata) {
710  switch(element.Identifier.Value) {
711  case ELEMENT_TYPE_CHANNELS:
712  audio.Channels =
713  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
714  break;
715  case ELEMENT_TYPE_SAMPLING_FREQUENCY:
716  audio.SamplingFrequency =
717  ReadVariableLengthFloat(readdata(), element.Length.Value);
718  break;
719  case ELEMENT_TYPE_BIT_DEPTH:
720  audio.BitDepth =
721  ReadVariableLengthUnsignedInteger(readdata(), element.Length.Value);
722  break;
723  }
724  return true;
725  });
726 
727  if(result < 1) {
728  SetError("parsing Audio element failed");
729  error = true;
730  return false;
731  }
732 
733  track.TrackTypeData = audio;
734  break;
735  }
737  track.CodecPrivateOffset = element.DataStart;
738  track.CodecPrivateLength = element.Length.Value;
739  break;
740  }
741 
742  return true;
743  });
744 
745  // Error checking before adding this track to avoid adding invalid tracks
746  if(error)
747  return;
748 
749  if(result < 1) {
750  SetError("reading a track entry element contents failed (or it is empty)");
751  return;
752  }
753 
754  if(track.TrackType == TRACK_TYPE::Video &&
755  !std::holds_alternative<TrackInfo::Video>(track.TrackTypeData)) {
756  SetError("parsed a track of type video but it didn't contain a Video element");
757  return;
758  }
759 
760  if(track.TrackType == TRACK_TYPE::Audio &&
761  !std::holds_alternative<TrackInfo::Audio>(track.TrackTypeData)) {
762  SetError("parsed a track of type audio but it didn't contain a Audio element");
763  return;
764  }
765 
766  // Add the parsed track to the list of tracks and update the easy to read variables for how
767  // many tracks of each type we have
768  Parsed.Tracks.push_back(track);
769 
770  if(track.TrackType == TRACK_TYPE::Video)
771  ++Parsed.VideoTrackCount;
772  if(track.TrackType == TRACK_TYPE::Audio)
773  ++Parsed.AudioTrackCount;
774 }
EBMLLIdentifierValue Identifier
A basic EBML element.
void SetError(const std::string &error)
static DLLEXPORT uint64_t ReadVariableLengthUnsignedInteger(const std::vector< uint8_t > &data, int length)
Reads a variable length unsigned integer (in big endian form) into an integer.
constexpr uint64_t ELEMENT_TYPE_DEFAULT_DURATION
constexpr uint64_t ELEMENT_TYPE_AUDIO
constexpr uint64_t ELEMENT_TYPE_VIDEO
constexpr uint64_t ELEMENT_TYPE_CODEC_PRIVATE
int ReadChildElements(T &stream, Callback callback) const
uint64_t Value
The contents of this value.

◆ HandleTracksElement()

DLLEXPORT void MatroskaParser::HandleTracksElement ( const EBMLElement element)
protected

Definition at line 610 of file MatroskaParser.cpp.

611 {
612  int result =
613  element.ReadChildElements(Reader, [&](const EBMLElement& element, auto readdata) {
614  switch(element.Identifier.Value) {
615  case ELEMENT_TYPE_TRACK_ENTRY: HandleTrackEntryElement(element); break;
616  }
617 
618  return true;
619  });
620 
621  if(result < 0) {
622  SetError("reading tracks element contents failed");
623  }
624 }
EBMLLIdentifierValue Identifier
A basic EBML element.
void SetError(const std::string &error)
int ReadChildElements(T &stream, Callback callback) const
uint64_t Value
The contents of this value.

◆ JumpToFirstCluster()

DLLEXPORT void MatroskaParser::JumpToFirstCluster ( )

Jumps the read position to the first cluster.

Definition at line 846 of file MatroskaParser.cpp.

847 {
848  if(Parsed.Clusters.empty()) {
849  ClusterBlockIterator.reset();
850  return;
851  }
852 
853  ClusterBlockIterator = BlockIteratorInfo(Parsed.Clusters.front().DataStart);
854 }

◆ operator=()

MatroskaParser& Leviathan::MatroskaParser::operator= ( const MatroskaParser other)
inline

Definition at line 141 of file MatroskaParser.h.

142  {
143  Error = other.Error;
144  ErrorMessage = other.ErrorMessage;
145  File = other.File;
146  Parsed = other.Parsed;
147  ClusterBlockIterator = other.ClusterBlockIterator;
148  Reader.open(File, std::ios::binary);
149  Reader.seekg(other.Reader.tellg());
150  return *this;
151  }

◆ ParseHeaderValues()

DLLEXPORT bool MatroskaParser::ParseHeaderValues ( int  headerSizeLeft)
protected
Returns
True if can be parsed
Note
Reader must be at the start of the list of header data

Definition at line 425 of file MatroskaParser.cpp.

426 {
427  std::vector<uint8_t> dataBuffer;
428 
429  while(headerSizeLeft > 0) {
430  EBMLLIdentifierValue valueIdentifier(Reader);
431 
432  if(!Reader.good()) {
433  SetError("data ended while reading a header identifier");
434  return false;
435  }
436 
437  EBMLLengthValue valueLength(Reader);
438 
439  if(!Reader.good()) {
440  SetError("data ended while reading a header variable length");
441  return false;
442  }
443 
444  // Read the data
445  // This shouldn't shrink automatically
446  dataBuffer.resize(valueLength.Value);
447 
448  Reader.read(reinterpret_cast<char*>(dataBuffer.data()), dataBuffer.size());
449 
450  if(!Reader.good()) {
451  SetError("data ended while reading a header variable's data of length: " +
452  std::to_string(dataBuffer.size()));
453  return false;
454  }
455 
456  // Decrement the left number of bytes in the header by the number of bytes just
457  // processed
458  headerSizeLeft -=
459  valueIdentifier.ByteLength + valueLength.ByteLength + valueLength.Value;
460 
461  // Handle variable type if it is known
462  switch(valueIdentifier.Value) {
464  Parsed.EBMLVersion =
465  ReadVariableLengthUnsignedInteger(dataBuffer, valueLength.Value);
466  break;
468  Parsed.EBMLReadVersion =
469  ReadVariableLengthUnsignedInteger(dataBuffer, valueLength.Value);
470  break;
472  Parsed.EBMLMaxIDLength =
473  ReadVariableLengthUnsignedInteger(dataBuffer, valueLength.Value);
474  break;
476  Parsed.EBMLMaxSizeLength =
477  ReadVariableLengthUnsignedInteger(dataBuffer, valueLength.Value);
478  break;
480  Parsed.DocType =
481  std::string(reinterpret_cast<char*>(dataBuffer.data()), valueLength.Value);
482  break;
484  Parsed.DocTypeVersion =
485  ReadVariableLengthUnsignedInteger(dataBuffer, valueLength.Value);
486  break;
488  Parsed.DocTypeReadVersion =
489  ReadVariableLengthUnsignedInteger(dataBuffer, valueLength.Value);
490  break;
491  }
492  }
493 
494  // Check the header info for whether we can understand the file or not
495  if(Parsed.EBMLVersion != 1) {
496  SetError("unknown EBML version: " + std::to_string(Parsed.EBMLVersion));
497  return false;
498  }
499 
500  if(Parsed.EBMLReadVersion != 1) {
501  SetError("unknown EBML read version: " + std::to_string(Parsed.EBMLReadVersion));
502  return false;
503  }
504 
505  if(Parsed.DocType != "matroska") {
506  SetError("unknown doctype: " + Parsed.DocType);
507  return false;
508  }
509 
510  // This parser is written for version 4 but assume it works just as fine for lower versions
511  if(Parsed.DocTypeVersion > 4 || Parsed.DocTypeVersion < 1) {
512  SetError("unknown doctype version: " + std::to_string(Parsed.DocTypeVersion));
513  return false;
514  }
515 
516  if(Parsed.DocTypeReadVersion != 2) {
517  SetError("unknown doctype read version: " + std::to_string(Parsed.DocTypeReadVersion));
518  return false;
519  }
520 
521  return true;
522 }
constexpr uint64_t EBML_FIELD_IDENTIFIER_DOCTYPE
constexpr uint64_t EBML_FIELD_IDENTIFIER_MAX_SIZE_LENGTH
constexpr uint64_t EBML_FIELD_IDENTIFIER_DOCTYPE_VERSION
void SetError(const std::string &error)
static DLLEXPORT uint64_t ReadVariableLengthUnsignedInteger(const std::vector< uint8_t > &data, int length)
Reads a variable length unsigned integer (in big endian form) into an integer.
constexpr uint64_t EBML_FIELD_IDENTIFIER_DOCTYPE_READ_VERSION
constexpr uint64_t EBML_FIELD_IDENTIFIER_MAX_ID_LENGTH
Decodes an indentifier encoded in EBML.
constexpr uint64_t EBML_FIELD_IDENTIFIER_VERSION
constexpr uint64_t EBML_FIELD_IDENTIFIER_READ_VERSION
Decodes a length encoded in EBML.

◆ PeekNextBlockForTrack()

DLLEXPORT std::tuple< bool, size_t, MatroskaParser::BlockInfo > MatroskaParser::PeekNextBlockForTrack ( int  tracknumber) const

Peeks at the next block matching track number.

If there is no next block this returns (false, 0, BlockInfo{})

Definition at line 892 of file MatroskaParser.cpp.

893 {
894  BlockSearchResult result;
895  if(!_SearchForNextBlock(tracknumber, result)) {
896  // No data found
897  return std::make_tuple(false, 0, BlockInfo{});
898  }
899 
900  return std::make_tuple(true, result.BlockElement->Length.Value, result.BlockData);
901 }
DLLEXPORT bool _SearchForNextBlock(int tracknumber, BlockSearchResult &result) const

◆ ReadTrackCodecPrivateData()

DLLEXPORT std::vector< uint8_t > MatroskaParser::ReadTrackCodecPrivateData ( const TrackInfo track)

Reads the codec private data for a track.

Definition at line 825 of file MatroskaParser.cpp.

827 {
828  if(!track.CodecPrivateOffset || !track.CodecPrivateLength || !Good())
829  return {};
830 
831  std::vector<uint8_t> result;
832  result.resize(*track.CodecPrivateLength);
833 
834  Reader.seekg(*track.CodecPrivateOffset);
835 
836  Reader.read(reinterpret_cast<char*>(result.data()), result.size());
837 
838  if(!Reader.good()) {
839  SetError("data ended while reading track codec private data");
840  return {};
841  }
842 
843  return result;
844 }
void SetError(const std::string &error)

◆ ReadVariableLengthFloat()

DLLEXPORT double MatroskaParser::ReadVariableLengthFloat ( const std::vector< uint8_t > &  data,
int  length 
)
static

Reads a variable length float (in big endian form)

Parameters
lengthThe length in bytes to read, must be > 0 && < data.length()

Definition at line 1069 of file MatroskaParser.cpp.

1071 {
1072  if(length == 4) {
1073  float result;
1074 
1075  uint32_t tmp = ReadVariableLengthUnsignedInteger(data, length);
1076  static_assert(sizeof(tmp) == sizeof(result), "float size mismatch on integer type");
1077 
1078  std::memcpy(&result, &tmp, sizeof(result));
1079  return result;
1080 
1081  } else if(length == 8) {
1082  double result;
1083 
1084  uint64_t tmp = ReadVariableLengthUnsignedInteger(data, length);
1085  static_assert(sizeof(tmp) == sizeof(result), "float size mismatch on integer type");
1086 
1087  std::memcpy(&result, &tmp, sizeof(result));
1088  return result;
1089  }
1090 
1091  LOG_ERROR("MatroskaParser: ReadVariableLengthFloat: cannot handle float of length: " +
1092  std::to_string(length));
1093  return -1.f;
1094 }
#define LOG_ERROR(x)
Definition: Define.h:92
static DLLEXPORT uint64_t ReadVariableLengthUnsignedInteger(const std::vector< uint8_t > &data, int length)
Reads a variable length unsigned integer (in big endian form) into an integer.
unsigned int uint32_t
Definition: core.h:40

◆ ReadVariableLengthUnsignedInteger()

DLLEXPORT uint64_t MatroskaParser::ReadVariableLengthUnsignedInteger ( const std::vector< uint8_t > &  data,
int  length 
)
static

Reads a variable length unsigned integer (in big endian form) into an integer.

Parameters
lengthThe length in bytes to read, must be > 0 && < data.length()

Definition at line 1057 of file MatroskaParser.cpp.

1059 {
1060  uint64_t result = 0;
1061 
1062  for(int i = 0; i < length && i < static_cast<int>(data.size()); ++i) {
1063  result += static_cast<uint64_t>(data[i]) << (length - 1 - i) * BITS_IN_BYTE;
1064  }
1065 
1066  return result;
1067 }
constexpr auto BITS_IN_BYTE
Definition: Define.h:47

◆ SetError()

void Leviathan::MatroskaParser::SetError ( const std::string &  error)
inlineprotected

Definition at line 228 of file MatroskaParser.h.

229  {
230  Error = true;
231  if(ErrorMessage.empty()) {
232  ErrorMessage = error;
233  } else {
234  ErrorMessage += "; " + error;
235  }
236  }

◆ SplitVorbisPrivateSetupData()

DLLEXPORT std::vector< std::tuple< const uint8_t *, size_t > > MatroskaParser::SplitVorbisPrivateSetupData ( const uint8_t *  codecprivatedata,
size_t  datalength 
)
static

Splits Vorbis private data into the 3 setup packets.

https://matroska.org/technical/specs/codecid/index.html details how the vorbis private data is packed

Definition at line 1111 of file MatroskaParser.cpp.

1113 {
1114  try {
1115  XiphLacing laced(codecprivatedata, datalength);
1116 
1117  std::vector<std::tuple<const uint8_t*, size_t>> result;
1118 
1119  if(laced.ItemCount != 3) {
1120  LOG_WARNING("MatroskaParser: Vorbis private data: expected packet count to be 3, "
1121  "but got: " +
1122  std::to_string(laced.ItemCount));
1123  }
1124 
1125  const uint8_t* currentData = codecprivatedata + laced.LacedHeaderLenght;
1126 
1127  for(int i = 0; i < laced.ItemCount; ++i) {
1128  const auto size = laced.ItemSizes[i];
1129 
1130  if(currentData + size - codecprivatedata > static_cast<std::ptrdiff_t>(datalength))
1131  throw InvalidArgument(
1132  "there isn't enough data in the buffer for the current item");
1133 
1134  result.push_back(std::make_tuple(currentData, size));
1135  currentData += size;
1136  }
1137 
1138  return result;
1139 
1140  } catch(const InvalidArgument& e) {
1141  LOG_ERROR(
1142  "MatroskaParser: SplitVorbisPrivateSetupData: data is malformed, exception: ");
1143  e.PrintToLog();
1144  return {};
1145  }
1146 }
#define LOG_ERROR(x)
Definition: Define.h:92
#define LOG_WARNING(x)
Definition: Define.h:91
unsigned char uint8_t
Definition: core.h:38
virtual DLLEXPORT void PrintToLog() const noexcept
Definition: Exceptions.cpp:35

Member Data Documentation

◆ CODEC_TYPE_AV1

constexpr auto Leviathan::MatroskaParser::CODEC_TYPE_AV1 = "V_AV1"
static

Definition at line 43 of file MatroskaParser.h.

◆ CODEC_TYPE_OPUS

constexpr auto Leviathan::MatroskaParser::CODEC_TYPE_OPUS = "A_OPUS"
static

Definition at line 45 of file MatroskaParser.h.

◆ CODEC_TYPE_VORBIS

constexpr auto Leviathan::MatroskaParser::CODEC_TYPE_VORBIS = "A_VORBIS"
static

Definition at line 44 of file MatroskaParser.h.

◆ MATROSKA_DURATION_TO_SECONDS

constexpr float Leviathan::MatroskaParser::MATROSKA_DURATION_TO_SECONDS = 0.001f
static

Definition at line 48 of file MatroskaParser.h.


The documentation for this class was generated from the following files: