mp4/SampleDescriptionBox.go

195 lines
7.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2024 Martin Riedl
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gomp4
import (
"encoding/binary"
"fmt"
)
// SampleDescriptionBox sample description box struct
//
// 8.5.2 Sample Description Box
//
// Box Types: stsd
// Container: Sample Table Box (stbl)
// Mandatory: Yes
// Quantity: Exactly one
//
// The sample description table gives detailed information about the coding type used, and any
// initialization information needed for that coding.
//
// The information stored in the sample description box after the entrycount is both tracktype specific
// as documented here, and can also have variants within a track type (e.g. different codings may use
// different specific information after some common fields, even within a video track).
//
// Which type of sample entry form is used is determined by the media handler, using a suitable form,
// such as one defined in clause 12, or defined in a derived specification, or registration.
//
// Multiple descriptions may be used within a track.
//
// If the format field of a SampleEntry is unrecognized, neither the sample description itself, nor the
// associated media samples, shall be decoded.
//
// All string fields shall be nullterminated, even if unused. “Optional” means there is at least one null byte.
//
// Entries that identify the format by MIME type, such as a TextSubtitleSampleEntry,
// TextMetaDataSampleEntry, or SimpleTextSampleEntry, all of which contain a MIME type, may be used
// to identify the format of streams for which a MIME type applies. A MIME type applies if the contents of
// the string in the optional configuration box (without its null termination), followed by the contents of a
// set of samples, starting with a sync sample and ending at the sample immediately preceding a sync
// sample, are concatenated in their entirety, and the result meets the decoding requirements for
// documents of that MIME type. Nonsync samples should be used only if that format specifies the
// behaviour of progressive decoding, and then the sample times indicate when the results of such
// progressive decoding should be presented (according to the media type).
//
// In some classes derived from SampleEntry, namespace and schema_location are used both to identify
// the XML document content and to declare “brand” or profile compatibility. Multiple namespace
// identifiers indicate that the track conforms to the specification represented by each of the identifiers,
// some of which may identify supersets of the features present. A decoder should be able to decode all the
// namespaces in order to be able to decode and present correctly the media associated with this sample
// entry.
type SampleDescriptionBox struct {
*FullBox
// is an integer that gives the number of entries in the following table
EntryCount uint32
ChildBoxes []any
}
// BoxTypeSampleDescription Sample Description Box
const BoxTypeSampleDescription = "stsd"
func init() {
BoxDefinitions = append(BoxDefinitions, BoxDefinition{
Type: BoxTypeSampleDescription,
ParentTypes: []string{BoxTypeSampleTable},
Parser: ParseSampleDescriptionBox,
})
}
// ParseSampleDescriptionBox creates a new sample description box struct based on bytes
func ParseSampleDescriptionBox(parser *Parser, filePosition uint64, headerSize uint32, content []byte) (any, error) {
box := &SampleDescriptionBox{
FullBox: newFullBox(&Box{filePosition, headerSize}, content[0:4]),
}
// entry counter
box.EntryCount = binary.BigEndian.Uint32(content[4:8])
// parse child boxes
var err error
box.ChildBoxes, err = box.parseChildBoxes(parser, BoxTypeSampleDescription, filePosition, content[8:])
if err != nil {
return box, err
}
// validate entry count
if int(box.EntryCount) != len(box.ChildBoxes) {
return box, fmt.Errorf("invalid amount of boxes at %d; found %d but expected %d", filePosition, len(box.ChildBoxes), box.EntryCount)
}
return box, nil
}
type SampleEntry struct {
*Box
// is an integer that contains the index of the data reference to use to
// retrieve data associated with samples that use this sample description. Data references are
// stored in Data Reference Boxes. The index ranges from 1 to the number of data references.
DataReferenceIndex uint16
}
func ParseSampleEntry(filePosition uint64, headerSize uint32, content []byte) (*SampleEntry, int) {
box := &SampleEntry{
Box: &Box{filePosition, headerSize},
}
// skip reserved
position := 6
// parse reference index
box.DataReferenceIndex = binary.BigEndian.Uint16(content[position : position+2])
position += 2
return box, position
}
// VisualSampleEntry visual sample entry struct
//
// 12.1.3 Sample entry
//
// Video tracks use VisualSampleEntry.
//
// In video tracks, the frame_count field must be 1 unless the specification for the media format explicitly
// documents this template field and permits larger values. That specification must document both how
// the individual frames of video are found (their size information) and their timing established. That
// timing might be as simple as dividing the sample duration by the frame count to establish the frame
// duration.
type VisualSampleEntry struct {
*SampleEntry
Width uint16
Height uint16
// give the resolution of the image in pixelsperinch, as a fixed 16.16 number
HorizResolution Fixed1616
// give the resolution of the image in pixelsperinch, as a fixed 16.16 number
VertResolution Fixed1616
// indicates how many frames of compressed video are stored in each sample. The
// default is 1, for one frame per sample; it may be more than 1 for multiple frames per sample
FrameCount uint16
// is a name, for informative purposes. It is formatted in a fixed 32byte field, with
//the first byte set to the number of bytes to be displayed, followed by that number of bytes of
//displayable data, and then padding to complete 32 bytes total (including the size byte). The field
//may be set to 0.
CompressorName string
Depth uint16
}
func ParseVisualSampleEntry(filePosition uint64, headerSize uint32, content []byte) (*VisualSampleEntry, int) {
sampleEntry, position := ParseSampleEntry(filePosition, headerSize, content)
box := &VisualSampleEntry{
SampleEntry: sampleEntry,
}
// skip pre-defined and reserved bytes
position += 2 + 2 + 4*3
// parse resolution
box.Width = binary.BigEndian.Uint16(content[position : position+2])
box.Height = binary.BigEndian.Uint16(content[position+2 : position+4])
box.HorizResolution = NewFixed1616ByBytes(content[position+4 : position+8])
box.VertResolution = NewFixed1616ByBytes(content[position+8 : position+12])
position += 12
// skip reserved bytes
position += 4
// parse frame count
box.FrameCount = binary.BigEndian.Uint16(content[position : position+2])
position += 2
// parse compressor name length (32bytes total)
compressorNameLength := int(content[position])
box.CompressorName = string(content[position+1 : position+1+compressorNameLength])
position += 32
// parse depth
box.Depth = binary.BigEndian.Uint16(content[position : position+2])
position += 2
// skip pre-defined bytes
position += 2
return box, position
}