// Copyright 2024 Martin Riedl // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package gomp4 import ( "encoding/binary" "time" ) // TrackHeaderBox track header box struct // // 8.3.2 Track Header Box // // Box Type: ‘tkhd’ // Container: Track Box (‘trak’) // Mandatory: Yes // Quantity: Exactly one // // This box specifies the characteristics of a single track. Exactly one Track Header Box is contained in a // track. // // In the absence of an edit list, the presentation of a track starts at the beginning of the overall // presentation. An empty edit is used to offset the start time of a track. // // The default value of the track header flags for media tracks is 7 (track_enabled, track_in_movie, // track_in_preview). If in a presentation all tracks have neither track_in_movie nor track_in_preview set, // then all tracks shall be treated as if both flags were set on all tracks. Server hint tracks should have the // track_in_movie and track_in_preview set to 0, so that they are ignored for local playback and // preview. // // Under the ‘iso3’ brand or brands that share its requirements, the width and height in the track header // are measured on a notional 'square' (uniform) grid. Track video data is normalized to these dimensions // (logically) before any transformation or placement caused by a layup or composition system. Track // (and movie) matrices, if used, also operate in this uniformly‐scaled space. // // The duration field here does not include the duration of following movie fragments, if any, but only of // the media in the enclosing Movie Box. The Movie Extends Header box may be used to document the // duration including movie fragments, when desired and possible. type TrackHeaderBox struct { *FullBox // is an integer that declares the creation time of this track (in seconds since // midnight, Jan. 1, 1904, in UTC time). CreationTimeV0 time.Time // is an integer that declares the creation time of this track (in seconds since // midnight, Jan. 1, 1904, in UTC time). CreationTimeV1 time.Time // is an integer that declares the most recent time the track was modified (in // seconds since midnight, Jan. 1, 1904, in UTC time). ModificationTimeV0 time.Time // is an integer that declares the most recent time the track was modified (in // seconds since midnight, Jan. 1, 1904, in UTC time). ModificationTimeV1 time.Time // is an integer that uniquely identifies this track over the entire life‐time of this // presentation. Track IDs are never re‐used and cannot be zero. TrackID uint32 // is an integer that indicates the duration of this track (in the timescale indicated in the // Movie Header Box). The value of this field is equal to the sum of the durations of all of the track’s // edits. If there is no edit list, then the duration is the sum of the sample durations, converted into // the timescale in the Movie Header Box. If the duration of this track cannot be determined then // duration is set to all 1s. DurationV0 uint32 // is an integer that indicates the duration of this track (in the timescale indicated in the // Movie Header Box). The value of this field is equal to the sum of the durations of all of the track’s // edits. If there is no edit list, then the duration is the sum of the sample durations, converted into // the timescale in the Movie Header Box. If the duration of this track cannot be determined then // duration is set to all 1s. DurationV1 uint64 // specifies the front‐to‐back ordering of video tracks; tracks with lower numbers are closer // to the viewer. 0 is the normal value, and ‐1 would be in front of track 0, and so on. Layer int16 // is an integer that specifies a group or collection of tracks. If this field is 0 // there is no information on possible relations to other tracks. If this field is not 0, it should be the // same for tracks that contain alternate data for one another and different for tracks belonging to // different such groups. Only one track within an alternate group should be played or streamed at // any one time, and must be distinguishable from other tracks in the group via attributes such as // bitrate, codec, language, packet size etc. A group may have only one member. AlternativeGroup int16 // is a fixed 8.8 value specifying the track's relative audio volume. Full volume is 1.0 // (0x0100) and is the normal value. Its value is irrelevant for a purely visual track. Tracks may be // composed by combining them according to their volume, and then using the overall Movie // Header Box volume setting; or more complex audio composition (e.g. MPEG‐4 BIFS) may be // used. Volume Fixed88 // provides a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex // (0,0,0x40000000). Matrix []int32 // fixed‐point 16.16 values are track‐dependent as follows: // // For text and subtitle tracks, they may, depending on the coding format, describe the suggested // size of the rendering area. For such tracks, the value 0x0 may also be used to indicate that the // data may be rendered at any size, that no preferred size has been indicated and that the actual // size may be determined by the external context or by reusing the width and height of another // track. For those tracks, the flag track_size_is_aspect_ratio may also be used. // For non‐visual tracks (e.g. audio), they should be set to zero. // For all other tracks, they specify the track's visual presentation size. These need not be the same // as the pixel dimensions of the images, which is documented in the sample description(s); all // images in the sequence are scaled to this size, before any overall transformation of the track // represented by the matrix. The pixel dimensions of the images are the default values. Width Fixed1616 // fixed‐point 16.16 values are track‐dependent as follows: // // For text and subtitle tracks, they may, depending on the coding format, describe the suggested // size of the rendering area. For such tracks, the value 0x0 may also be used to indicate that the // data may be rendered at any size, that no preferred size has been indicated and that the actual // size may be determined by the external context or by reusing the width and height of another // track. For those tracks, the flag track_size_is_aspect_ratio may also be used. // For non‐visual tracks (e.g. audio), they should be set to zero. // For all other tracks, they specify the track's visual presentation size. These need not be the same // as the pixel dimensions of the images, which is documented in the sample description(s); all // images in the sequence are scaled to this size, before any overall transformation of the track // represented by the matrix. The pixel dimensions of the images are the default values. Height Fixed1616 } const ( // TrackHeaderBoxFlagTrackEnabled Track Enabled Flag // Indicates that the track is enabled. Flag value is 0x000001. A disabled track (the // low bit is zero) is treated as if it were not present. TrackHeaderBoxFlagTrackEnabled uint32 = 0x00000001 // TrackHeaderBoxFlagTrackInMovie Track in Movie // Indicates that the track is used in the presentation. Flag value is 0x000002. TrackHeaderBoxFlagTrackInMovie uint32 = 0x00000002 // TrackHeaderBoxFlagTrackInPreview Track in Preview // Indicates that the track is used when previewing the presentation. Flag value // is 0x000004. TrackHeaderBoxFlagTrackInPreview uint32 = 0x00000004 // TrackHeaderBoxFlagTrackSizeIsAspectRatio Track Size is Aspect Ratio // Indicates that the width and height fields are not expressed in // pixel units. The values have the same units but these units are not specified. The values are // only an indication of the desired aspect ratio. If the aspect ratios of this track and other // related tracks are not identical, then the respective positioning of the tracks is undefined, // possibly defined by external contexts. Flag value is 0x000008. TrackHeaderBoxFlagTrackSizeIsAspectRatio uint32 = 0x00000008 ) // ParseTrackHeaderBox creates a new Track Header Box struct func ParseTrackHeaderBox(filePosition uint64, headerSize uint32, content []byte) *TrackHeaderBox { box := &TrackHeaderBox{ FullBox: newFullBox(&Box{filePosition, headerSize}, content[0:4]), } position := 4 if box.Version == 1 { box.CreationTimeV1 = since1904ToTime(int64(binary.BigEndian.Uint64(content[4:12]))) box.ModificationTimeV1 = since1904ToTime(int64(binary.BigEndian.Uint64(content[12:20]))) box.TrackID = binary.BigEndian.Uint32(content[20:24]) // uint32 reserved box.DurationV1 = binary.BigEndian.Uint64(content[28:36]) position += 32 } else { // version == 0 box.CreationTimeV0 = since1904ToTime(int64(binary.BigEndian.Uint32(content[4:8]))) box.ModificationTimeV0 = since1904ToTime(int64(binary.BigEndian.Uint32(content[8:12]))) box.TrackID = binary.BigEndian.Uint32(content[12:16]) // uint32 reserved box.DurationV0 = binary.BigEndian.Uint32(content[20:24]) position += 20 } // skip 2 reserved uint32 position += 4 + 4 // parse next fields box.Layer = int16(binary.BigEndian.Uint16(content[position : position+2])) box.AlternativeGroup = int16(binary.BigEndian.Uint16(content[position+2 : position+4])) box.Volume = NewFixed88ByBytes(content[position+4 : position+6]) position += 2 + 2 + 2 + 2 // 4 bytes for layer, 2 bytes for alternative group, 2 bytes for volume and 2 bytes reserved // parse matrix for i := 0; i < 9; i++ { box.Matrix = append(box.Matrix, int32(binary.BigEndian.Uint32(content[position:position+4]))) position += 4 } // parse width/height box.Width = NewFixed1616ByBytes(content[position : position+4]) box.Height = NewFixed1616ByBytes(content[position+4 : position+8]) return box }