mp4/TrackHeaderBox.go
2024-12-03 19:24:00 +01:00

205 lines
10 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2024 Martin Riedl
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gomp4
import (
"encoding/binary"
"time"
)
// TrackHeaderBox track header box struct
//
// 8.3.2 Track Header Box
//
// Box Type: tkhd
// Container: Track Box (trak)
// Mandatory: Yes
// Quantity: Exactly one
//
// This box specifies the characteristics of a single track. Exactly one Track Header Box is contained in a
// track.
//
// In the absence of an edit list, the presentation of a track starts at the beginning of the overall
// presentation. An empty edit is used to offset the start time of a track.
//
// The default value of the track header flags for media tracks is 7 (track_enabled, track_in_movie,
// track_in_preview). If in a presentation all tracks have neither track_in_movie nor track_in_preview set,
// then all tracks shall be treated as if both flags were set on all tracks. Server hint tracks should have the
// track_in_movie and track_in_preview set to 0, so that they are ignored for local playback and
// preview.
//
// Under the iso3 brand or brands that share its requirements, the width and height in the track header
// are measured on a notional 'square' (uniform) grid. Track video data is normalized to these dimensions
// (logically) before any transformation or placement caused by a layup or composition system. Track
// (and movie) matrices, if used, also operate in this uniformlyscaled space.
//
// The duration field here does not include the duration of following movie fragments, if any, but only of
// the media in the enclosing Movie Box. The Movie Extends Header box may be used to document the
// duration including movie fragments, when desired and possible.
type TrackHeaderBox struct {
*FullBox
// is an integer that declares the creation time of this track (in seconds since
// midnight, Jan. 1, 1904, in UTC time).
CreationTimeV0 time.Time
// is an integer that declares the creation time of this track (in seconds since
// midnight, Jan. 1, 1904, in UTC time).
CreationTimeV1 time.Time
// is an integer that declares the most recent time the track was modified (in
// seconds since midnight, Jan. 1, 1904, in UTC time).
ModificationTimeV0 time.Time
// is an integer that declares the most recent time the track was modified (in
// seconds since midnight, Jan. 1, 1904, in UTC time).
ModificationTimeV1 time.Time
// is an integer that uniquely identifies this track over the entire lifetime of this
// presentation. Track IDs are never reused and cannot be zero.
TrackID uint32
// is an integer that indicates the duration of this track (in the timescale indicated in the
// Movie Header Box). The value of this field is equal to the sum of the durations of all of the tracks
// edits. If there is no edit list, then the duration is the sum of the sample durations, converted into
// the timescale in the Movie Header Box. If the duration of this track cannot be determined then
// duration is set to all 1s.
DurationV0 uint32
// is an integer that indicates the duration of this track (in the timescale indicated in the
// Movie Header Box). The value of this field is equal to the sum of the durations of all of the tracks
// edits. If there is no edit list, then the duration is the sum of the sample durations, converted into
// the timescale in the Movie Header Box. If the duration of this track cannot be determined then
// duration is set to all 1s.
DurationV1 uint64
// specifies the fronttoback ordering of video tracks; tracks with lower numbers are closer
// to the viewer. 0 is the normal value, and 1 would be in front of track 0, and so on.
Layer int16
// is an integer that specifies a group or collection of tracks. If this field is 0
// there is no information on possible relations to other tracks. If this field is not 0, it should be the
// same for tracks that contain alternate data for one another and different for tracks belonging to
// different such groups. Only one track within an alternate group should be played or streamed at
// any one time, and must be distinguishable from other tracks in the group via attributes such as
// bitrate, codec, language, packet size etc. A group may have only one member.
AlternativeGroup int16
// is a fixed 8.8 value specifying the track's relative audio volume. Full volume is 1.0
// (0x0100) and is the normal value. Its value is irrelevant for a purely visual track. Tracks may be
// composed by combining them according to their volume, and then using the overall Movie
// Header Box volume setting; or more complex audio composition (e.g. MPEG4 BIFS) may be
// used.
Volume Fixed88
// provides a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex
// (0,0,0x40000000).
Matrix []int32
// fixedpoint 16.16 values are trackdependent as follows:
//
// For text and subtitle tracks, they may, depending on the coding format, describe the suggested
// size of the rendering area. For such tracks, the value 0x0 may also be used to indicate that the
// data may be rendered at any size, that no preferred size has been indicated and that the actual
// size may be determined by the external context or by reusing the width and height of another
// track. For those tracks, the flag track_size_is_aspect_ratio may also be used.
// For nonvisual tracks (e.g. audio), they should be set to zero.
// For all other tracks, they specify the track's visual presentation size. These need not be the same
// as the pixel dimensions of the images, which is documented in the sample description(s); all
// images in the sequence are scaled to this size, before any overall transformation of the track
// represented by the matrix. The pixel dimensions of the images are the default values.
Width Fixed1616
// fixedpoint 16.16 values are trackdependent as follows:
//
// For text and subtitle tracks, they may, depending on the coding format, describe the suggested
// size of the rendering area. For such tracks, the value 0x0 may also be used to indicate that the
// data may be rendered at any size, that no preferred size has been indicated and that the actual
// size may be determined by the external context or by reusing the width and height of another
// track. For those tracks, the flag track_size_is_aspect_ratio may also be used.
// For nonvisual tracks (e.g. audio), they should be set to zero.
// For all other tracks, they specify the track's visual presentation size. These need not be the same
// as the pixel dimensions of the images, which is documented in the sample description(s); all
// images in the sequence are scaled to this size, before any overall transformation of the track
// represented by the matrix. The pixel dimensions of the images are the default values.
Height Fixed1616
}
const (
// TrackHeaderBoxFlagTrackEnabled Track Enabled Flag
// Indicates that the track is enabled. Flag value is 0x000001. A disabled track (the
// low bit is zero) is treated as if it were not present.
TrackHeaderBoxFlagTrackEnabled uint32 = 0x00000001
// TrackHeaderBoxFlagTrackInMovie Track in Movie
// Indicates that the track is used in the presentation. Flag value is 0x000002.
TrackHeaderBoxFlagTrackInMovie uint32 = 0x00000002
// TrackHeaderBoxFlagTrackInPreview Track in Preview
// Indicates that the track is used when previewing the presentation. Flag value
// is 0x000004.
TrackHeaderBoxFlagTrackInPreview uint32 = 0x00000004
// TrackHeaderBoxFlagTrackSizeIsAspectRatio Track Size is Aspect Ratio
// Indicates that the width and height fields are not expressed in
// pixel units. The values have the same units but these units are not specified. The values are
// only an indication of the desired aspect ratio. If the aspect ratios of this track and other
// related tracks are not identical, then the respective positioning of the tracks is undefined,
// possibly defined by external contexts. Flag value is 0x000008.
TrackHeaderBoxFlagTrackSizeIsAspectRatio uint32 = 0x00000008
)
// BoxTypeTrackHeader Track Header Box
const BoxTypeTrackHeader = "tkhd"
func init() {
BoxDefinitions = append(BoxDefinitions, BoxDefinition{
Type: BoxTypeTrackHeader,
ParentTypes: []string{BoxTypeTrack},
Parser: ParseTrackHeaderBox,
})
}
// ParseTrackHeaderBox creates a new Track Header Box struct
func ParseTrackHeaderBox(parser *Parser, filePosition uint64, headerSize uint32, content []byte) (any, error) {
box := &TrackHeaderBox{
FullBox: newFullBox(&Box{filePosition, headerSize}, content[0:4]),
}
position := 4
if box.Version == 1 {
box.CreationTimeV1 = since1904ToTime(int64(binary.BigEndian.Uint64(content[4:12])))
box.ModificationTimeV1 = since1904ToTime(int64(binary.BigEndian.Uint64(content[12:20])))
box.TrackID = binary.BigEndian.Uint32(content[20:24])
// uint32 reserved
box.DurationV1 = binary.BigEndian.Uint64(content[28:36])
position += 32
} else { // version == 0
box.CreationTimeV0 = since1904ToTime(int64(binary.BigEndian.Uint32(content[4:8])))
box.ModificationTimeV0 = since1904ToTime(int64(binary.BigEndian.Uint32(content[8:12])))
box.TrackID = binary.BigEndian.Uint32(content[12:16])
// uint32 reserved
box.DurationV0 = binary.BigEndian.Uint32(content[20:24])
position += 20
}
// skip 2 reserved uint32
position += 4 + 4
// parse next fields
box.Layer = int16(binary.BigEndian.Uint16(content[position : position+2]))
box.AlternativeGroup = int16(binary.BigEndian.Uint16(content[position+2 : position+4]))
box.Volume = NewFixed88ByBytes(content[position+4 : position+6])
position += 2 + 2 + 2 + 2 // 4 bytes for layer, 2 bytes for alternative group, 2 bytes for volume and 2 bytes reserved
// parse matrix
for i := 0; i < 9; i++ {
box.Matrix = append(box.Matrix, int32(binary.BigEndian.Uint32(content[position:position+4])))
position += 4
}
// parse width/height
box.Width = NewFixed1616ByBytes(content[position : position+4])
box.Height = NewFixed1616ByBytes(content[position+4 : position+8])
return box, nil
}