diff --git a/Box.go b/Box.go index 6e7da36..000bf43 100644 --- a/Box.go +++ b/Box.go @@ -28,6 +28,10 @@ const ( BoxTypeMovie = "moov" // BoxTypeMovieHeader Movie Header Box BoxTypeMovieHeader = "mvhd" + // BoxTypeTrackBox Track Box + BoxTypeTrackBox = "trak" + // BoxTypeTrackHeaderBox Track Header Box + BoxTypeTrackHeaderBox = "tkhd" // BoxTypeMovieFragment Movie Fragment Box BoxTypeMovieFragment = "moof" // BoxTypeMovieFragmentHeader Movie Fragment Header Box diff --git a/Parser.go b/Parser.go index 06b68ec..237114e 100644 --- a/Parser.go +++ b/Parser.go @@ -112,6 +112,10 @@ func parseNextBox(reader io.Reader, filePosition uint64) (box interface{}, endPo box, err = ParseMovieBox(filePosition, boxHeaderSize, boxContentBytes) case BoxTypeMovieHeader: box = ParseMovieHeaderBox(filePosition, boxHeaderSize, boxContentBytes) + case BoxTypeTrackBox: + box, err = ParseTrackBox(filePosition, boxHeaderSize, boxContentBytes) + case BoxTypeTrackHeaderBox: + box = ParseTrackHeaderBox(filePosition, boxHeaderSize, boxContentBytes) case BoxTypeMovieFragment: box, err = ParseMovieFragmentBox(filePosition, boxHeaderSize, boxContentBytes) case BoxTypeMovieFragmentHeader: diff --git a/README.md b/README.md index 410bd2a..8b3ecef 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,8 @@ Implementation progress | 8.1.3 Progressive Download Information Box | pdin | - | | 8.2.1 Movie Box | moov | 100% | | 8.2.2 Movie Header Box | mvhd | 100% | -| 8.3.1 Track Box | trak | - | -| 8.3.2 Track Header Box | tkhd | - | +| 8.3.1 Track Box | trak | 100% | +| 8.3.2 Track Header Box | tkhd | 100% | | 8.3.3 Track Reference Box | tref | - | | 8.3.4 Track Group Box | trgr | - | | 8.4.1 Media Box | mdia | - | diff --git a/TrackBox.go b/TrackBox.go new file mode 100644 index 0000000..69bc9b4 --- /dev/null +++ b/TrackBox.go @@ -0,0 +1,48 @@ +// Copyright 2024 Martin Riedl +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gomp4 + +// TrackBox track box struct +// +// 8.3.1 Track Box +// Box Type: ‘trak’ +// Container: Movie Box (‘moov’) +// Mandatory: Yes +// Quantity: One or more +// +// This is a container box for a single track of a presentation. A presentation consists of one or more tracks. +// Each track is independent of the other tracks in the presentation and carries its own temporal and +// spatial information. Each track will contain its associated Media Box. +// +// Tracks are used for two purposes: (a) to contain media data (media tracks) and (b) to contain +// packetization information for streaming protocols (hint tracks). +// +// There shall be at least one media track within an ISO file, and all the media tracks that contributed to +// the hint tracks shall remain in the file, even if the media data within them is not referenced by the hint +// tracks; after deleting all hint tracks, the entire un‐hinted presentation shall remain. +type TrackBox struct { + *Box + ChildBoxes []interface{} +} + +// ParseTrackBox creates a new track box struct based on bytes +func ParseTrackBox(filePosition uint64, headerSize uint32, content []byte) (*TrackBox, error) { + box := &TrackBox{Box: &Box{filePosition, headerSize}} + + // parse child boxes + var err error + box.ChildBoxes, err = box.parseChildBoxes(filePosition, content) + return box, err +} diff --git a/TrackHeaderBox.go b/TrackHeaderBox.go new file mode 100644 index 0000000..d81dc34 --- /dev/null +++ b/TrackHeaderBox.go @@ -0,0 +1,194 @@ +// Copyright 2024 Martin Riedl +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gomp4 + +import ( + "encoding/binary" + "time" +) + +// TrackHeaderBox track header box struct +// +// 8.3.2 Track Header Box +// +// Box Type: ‘tkhd’ +// Container: Track Box (‘trak’) +// Mandatory: Yes +// Quantity: Exactly one +// +// This box specifies the characteristics of a single track. Exactly one Track Header Box is contained in a +// track. +// +// In the absence of an edit list, the presentation of a track starts at the beginning of the overall +// presentation. An empty edit is used to offset the start time of a track. +// +// The default value of the track header flags for media tracks is 7 (track_enabled, track_in_movie, +// track_in_preview). If in a presentation all tracks have neither track_in_movie nor track_in_preview set, +// then all tracks shall be treated as if both flags were set on all tracks. Server hint tracks should have the +// track_in_movie and track_in_preview set to 0, so that they are ignored for local playback and +// preview. +// +// Under the ‘iso3’ brand or brands that share its requirements, the width and height in the track header +// are measured on a notional 'square' (uniform) grid. Track video data is normalized to these dimensions +// (logically) before any transformation or placement caused by a layup or composition system. Track +// (and movie) matrices, if used, also operate in this uniformly‐scaled space. +// +// The duration field here does not include the duration of following movie fragments, if any, but only of +// the media in the enclosing Movie Box. The Movie Extends Header box may be used to document the +// duration including movie fragments, when desired and possible. +type TrackHeaderBox struct { + *FullBox + // is an integer that declares the creation time of this track (in seconds since + // midnight, Jan. 1, 1904, in UTC time). + CreationTimeV0 time.Time + // is an integer that declares the creation time of this track (in seconds since + // midnight, Jan. 1, 1904, in UTC time). + CreationTimeV1 time.Time + // is an integer that declares the most recent time the track was modified (in + // seconds since midnight, Jan. 1, 1904, in UTC time). + ModificationTimeV0 time.Time + // is an integer that declares the most recent time the track was modified (in + // seconds since midnight, Jan. 1, 1904, in UTC time). + ModificationTimeV1 time.Time + // is an integer that uniquely identifies this track over the entire life‐time of this + // presentation. Track IDs are never re‐used and cannot be zero. + TrackID uint32 + // is an integer that indicates the duration of this track (in the timescale indicated in the + // Movie Header Box). The value of this field is equal to the sum of the durations of all of the track’s + // edits. If there is no edit list, then the duration is the sum of the sample durations, converted into + // the timescale in the Movie Header Box. If the duration of this track cannot be determined then + // duration is set to all 1s. + DurationV0 uint32 + // is an integer that indicates the duration of this track (in the timescale indicated in the + // Movie Header Box). The value of this field is equal to the sum of the durations of all of the track’s + // edits. If there is no edit list, then the duration is the sum of the sample durations, converted into + // the timescale in the Movie Header Box. If the duration of this track cannot be determined then + // duration is set to all 1s. + DurationV1 uint64 + // specifies the front‐to‐back ordering of video tracks; tracks with lower numbers are closer + // to the viewer. 0 is the normal value, and ‐1 would be in front of track 0, and so on. + Layer int16 + // is an integer that specifies a group or collection of tracks. If this field is 0 + // there is no information on possible relations to other tracks. If this field is not 0, it should be the + // same for tracks that contain alternate data for one another and different for tracks belonging to + // different such groups. Only one track within an alternate group should be played or streamed at + // any one time, and must be distinguishable from other tracks in the group via attributes such as + // bitrate, codec, language, packet size etc. A group may have only one member. + AlternativeGroup int16 + // is a fixed 8.8 value specifying the track's relative audio volume. Full volume is 1.0 + // (0x0100) and is the normal value. Its value is irrelevant for a purely visual track. Tracks may be + // composed by combining them according to their volume, and then using the overall Movie + // Header Box volume setting; or more complex audio composition (e.g. MPEG‐4 BIFS) may be + // used. + Volume Fixed88 + // provides a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex + // (0,0,0x40000000). + Matrix []int32 + // fixed‐point 16.16 values are track‐dependent as follows: + // + // For text and subtitle tracks, they may, depending on the coding format, describe the suggested + // size of the rendering area. For such tracks, the value 0x0 may also be used to indicate that the + // data may be rendered at any size, that no preferred size has been indicated and that the actual + // size may be determined by the external context or by reusing the width and height of another + // track. For those tracks, the flag track_size_is_aspect_ratio may also be used. + // For non‐visual tracks (e.g. audio), they should be set to zero. + // For all other tracks, they specify the track's visual presentation size. These need not be the same + // as the pixel dimensions of the images, which is documented in the sample description(s); all + // images in the sequence are scaled to this size, before any overall transformation of the track + // represented by the matrix. The pixel dimensions of the images are the default values. + Width Fixed1616 + // fixed‐point 16.16 values are track‐dependent as follows: + // + // For text and subtitle tracks, they may, depending on the coding format, describe the suggested + // size of the rendering area. For such tracks, the value 0x0 may also be used to indicate that the + // data may be rendered at any size, that no preferred size has been indicated and that the actual + // size may be determined by the external context or by reusing the width and height of another + // track. For those tracks, the flag track_size_is_aspect_ratio may also be used. + // For non‐visual tracks (e.g. audio), they should be set to zero. + // For all other tracks, they specify the track's visual presentation size. These need not be the same + // as the pixel dimensions of the images, which is documented in the sample description(s); all + // images in the sequence are scaled to this size, before any overall transformation of the track + // represented by the matrix. The pixel dimensions of the images are the default values. + Height Fixed1616 +} + +const ( + // TrackHeaderBoxFlagTrackEnabled Track Enabled Flag + // Indicates that the track is enabled. Flag value is 0x000001. A disabled track (the + // low bit is zero) is treated as if it were not present. + TrackHeaderBoxFlagTrackEnabled uint32 = 0x00000001 + + // TrackHeaderBoxFlagTrackInMovie Track in Movie + // Indicates that the track is used in the presentation. Flag value is 0x000002. + TrackHeaderBoxFlagTrackInMovie uint32 = 0x00000002 + + // TrackHeaderBoxFlagTrackInPreview Track in Preview + // Indicates that the track is used when previewing the presentation. Flag value + // is 0x000004. + TrackHeaderBoxFlagTrackInPreview uint32 = 0x00000004 + + // TrackHeaderBoxFlagTrackSizeIsAspectRatio Track Size is Aspect Ratio + // Indicates that the width and height fields are not expressed in + // pixel units. The values have the same units but these units are not specified. The values are + // only an indication of the desired aspect ratio. If the aspect ratios of this track and other + // related tracks are not identical, then the respective positioning of the tracks is undefined, + // possibly defined by external contexts. Flag value is 0x000008. + TrackHeaderBoxFlagTrackSizeIsAspectRatio uint32 = 0x00000008 +) + +// ParseTrackHeaderBox creates a new Track Header Box struct +func ParseTrackHeaderBox(filePosition uint64, headerSize uint32, content []byte) *TrackHeaderBox { + box := &TrackHeaderBox{ + FullBox: newFullBox(&Box{filePosition, headerSize}, content[0:4]), + } + + position := 4 + if box.Version == 1 { + box.CreationTimeV1 = since1904ToTime(int64(binary.BigEndian.Uint64(content[4:12]))) + box.ModificationTimeV1 = since1904ToTime(int64(binary.BigEndian.Uint64(content[12:20]))) + box.TrackID = binary.BigEndian.Uint32(content[20:24]) + // uint32 reserved + box.DurationV1 = binary.BigEndian.Uint64(content[28:36]) + position += 32 + } else { // version == 0 + box.CreationTimeV0 = since1904ToTime(int64(binary.BigEndian.Uint32(content[4:8]))) + box.ModificationTimeV0 = since1904ToTime(int64(binary.BigEndian.Uint32(content[8:12]))) + box.TrackID = binary.BigEndian.Uint32(content[12:16]) + // uint32 reserved + box.DurationV0 = binary.BigEndian.Uint32(content[20:24]) + position += 20 + } + + // skip 2 reserved uint32 + position += 4 + 4 + + // parse next fields + box.Layer = int16(binary.BigEndian.Uint16(content[position : position+2])) + box.AlternativeGroup = int16(binary.BigEndian.Uint16(content[position+2 : position+4])) + box.Volume = NewFixed88ByBytes(content[position+4 : position+6]) + position += 2 + 2 + 2 + 2 // 4 bytes for layer, 2 bytes for alternative group, 2 bytes for volume and 2 bytes reserved + + // parse matrix + for i := 0; i < 9; i++ { + box.Matrix = append(box.Matrix, int32(binary.BigEndian.Uint32(content[position:position+4]))) + position += 4 + } + + // parse width/height + box.Width = NewFixed1616ByBytes(content[position : position+4]) + box.Height = NewFixed1616ByBytes(content[position+4 : position+8]) + + return box +}