// Copyright 2024 Martin Riedl // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package gomp4 import ( "encoding/binary" "fmt" ) // SampleDescriptionBox sample description box struct // // 8.5.2 Sample Description Box // // Box Types: ‘stsd’ // Container: Sample Table Box (‘stbl’) // Mandatory: Yes // Quantity: Exactly one // // The sample description table gives detailed information about the coding type used, and any // initialization information needed for that coding. // // The information stored in the sample description box after the entry‐count is both track‐type specific // as documented here, and can also have variants within a track type (e.g. different codings may use // different specific information after some common fields, even within a video track). // // Which type of sample entry form is used is determined by the media handler, using a suitable form, // such as one defined in clause 12, or defined in a derived specification, or registration. // // Multiple descriptions may be used within a track. // // If the ‘format’ field of a SampleEntry is unrecognized, neither the sample description itself, nor the // associated media samples, shall be decoded. // // All string fields shall be null‐terminated, even if unused. “Optional” means there is at least one null byte. // // Entries that identify the format by MIME type, such as a TextSubtitleSampleEntry, // TextMetaDataSampleEntry, or SimpleTextSampleEntry, all of which contain a MIME type, may be used // to identify the format of streams for which a MIME type applies. A MIME type applies if the contents of // the string in the optional configuration box (without its null termination), followed by the contents of a // set of samples, starting with a sync sample and ending at the sample immediately preceding a sync // sample, are concatenated in their entirety, and the result meets the decoding requirements for // documents of that MIME type. Non‐sync samples should be used only if that format specifies the // behaviour of ‘progressive decoding’, and then the sample times indicate when the results of such // progressive decoding should be presented (according to the media type). // // In some classes derived from SampleEntry, namespace and schema_location are used both to identify // the XML document content and to declare “brand” or profile compatibility. Multiple namespace // identifiers indicate that the track conforms to the specification represented by each of the identifiers, // some of which may identify supersets of the features present. A decoder should be able to decode all the // namespaces in order to be able to decode and present correctly the media associated with this sample // entry. type SampleDescriptionBox struct { *FullBox // is an integer that gives the number of entries in the following table EntryCount uint32 ChildBoxes []any } // BoxTypeSampleDescription Sample Description Box const BoxTypeSampleDescription = "stsd" func init() { BoxDefinitions = append(BoxDefinitions, BoxDefinition{ Type: BoxTypeSampleDescription, ParentTypes: []string{BoxTypeSampleTable}, Parser: ParseSampleDescriptionBox, }) } // ParseSampleDescriptionBox creates a new sample description box struct based on bytes func ParseSampleDescriptionBox(parser *Parser, filePosition uint64, headerSize uint32, content []byte) (any, error) { box := &SampleDescriptionBox{ FullBox: newFullBox(&Box{filePosition, headerSize}, content[0:4]), } // entry counter box.EntryCount = binary.BigEndian.Uint32(content[4:8]) // parse child boxes var err error box.ChildBoxes, err = box.parseChildBoxes(parser, BoxTypeSampleDescription, filePosition, content[8:]) if err != nil { return box, err } // validate entry count if int(box.EntryCount) != len(box.ChildBoxes) { return box, fmt.Errorf("invalid amount of boxes at %d; found %d but expected %d", filePosition, len(box.ChildBoxes), box.EntryCount) } return box, nil } type SampleEntry struct { *Box // is an integer that contains the index of the data reference to use to // retrieve data associated with samples that use this sample description. Data references are // stored in Data Reference Boxes. The index ranges from 1 to the number of data references. DataReferenceIndex uint16 } func ParseSampleEntry(filePosition uint64, headerSize uint32, content []byte) (*SampleEntry, int) { box := &SampleEntry{ Box: &Box{filePosition, headerSize}, } // skip reserved position := 6 // parse reference index box.DataReferenceIndex = binary.BigEndian.Uint16(content[position : position+2]) position += 2 return box, position } // VisualSampleEntry visual sample entry struct // // 12.1.3 Sample entry // // Video tracks use VisualSampleEntry. // // In video tracks, the frame_count field must be 1 unless the specification for the media format explicitly // documents this template field and permits larger values. That specification must document both how // the individual frames of video are found (their size information) and their timing established. That // timing might be as simple as dividing the sample duration by the frame count to establish the frame // duration. type VisualSampleEntry struct { *SampleEntry Width uint16 Height uint16 // give the resolution of the image in pixels‐per‐inch, as a fixed 16.16 number HorizResolution Fixed1616 // give the resolution of the image in pixels‐per‐inch, as a fixed 16.16 number VertResolution Fixed1616 // indicates how many frames of compressed video are stored in each sample. The // default is 1, for one frame per sample; it may be more than 1 for multiple frames per sample FrameCount uint16 // is a name, for informative purposes. It is formatted in a fixed 32‐byte field, with //the first byte set to the number of bytes to be displayed, followed by that number of bytes of //displayable data, and then padding to complete 32 bytes total (including the size byte). The field //may be set to 0. CompressorName string Depth uint16 } func ParseVisualSampleEntry(filePosition uint64, headerSize uint32, content []byte) (*VisualSampleEntry, int) { sampleEntry, position := ParseSampleEntry(filePosition, headerSize, content) box := &VisualSampleEntry{ SampleEntry: sampleEntry, } // skip pre-defined and reserved bytes position += 2 + 2 + 4*3 // parse resolution box.Width = binary.BigEndian.Uint16(content[position : position+2]) box.Height = binary.BigEndian.Uint16(content[position+2 : position+4]) box.HorizResolution = NewFixed1616ByBytes(content[position+4 : position+8]) box.VertResolution = NewFixed1616ByBytes(content[position+8 : position+12]) position += 12 // skip reserved bytes position += 4 // parse frame count box.FrameCount = binary.BigEndian.Uint16(content[position : position+2]) position += 2 // parse compressor name length (32bytes total) compressorNameLength := int(content[position]) box.CompressorName = string(content[position+1 : position+1+compressorNameLength]) position += 32 // parse depth box.Depth = binary.BigEndian.Uint16(content[position : position+2]) position += 2 // skip pre-defined bytes position += 2 return box, position }