From 309ecb9a7992e4689c5e17c32161bd610d242454 Mon Sep 17 00:00:00 2001 From: Martin Riedl Date: Sun, 8 Dec 2024 11:10:26 +0100 Subject: [PATCH] feat: new AVC box implementation --- AVCVideoStream.go | 51 +++++++++++++++++++++++++++ README.md | 10 ++++-- SampleDescriptionBox.go | 77 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 131 insertions(+), 7 deletions(-) create mode 100644 AVCVideoStream.go diff --git a/AVCVideoStream.go b/AVCVideoStream.go new file mode 100644 index 0000000..34eae11 --- /dev/null +++ b/AVCVideoStream.go @@ -0,0 +1,51 @@ +// Copyright 2024 Martin Riedl +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gomp4 + +type AVCSampleEntry struct { + *VisualSampleEntry + // TODO: some more +} + +// BoxTypeAVCSampleEntry1 AVC Sample Entry Box +const BoxTypeAVCSampleEntry1 = "avc1" + +// BoxTypeAVCSampleEntry3 AVC Sample Entry Box +const BoxTypeAVCSampleEntry3 = "avc3" + +func init() { + BoxDefinitions = append(BoxDefinitions, BoxDefinition{ + Type: BoxTypeAVCSampleEntry1, + ParentTypes: []string{BoxTypeSampleDescription}, + Parser: ParseAVCSampleEntry, + }, BoxDefinition{ + Type: BoxTypeAVCSampleEntry3, + ParentTypes: []string{BoxTypeSampleDescription}, + Parser: ParseAVCSampleEntry, + }) +} + +// ParseAVCSampleEntry creates a new AVC sample entry box struct based on bytes +func ParseAVCSampleEntry(parser *Parser, filePosition uint64, headerSize uint32, content []byte) (any, error) { + sampleEntry, position := ParseVisualSampleEntry(filePosition, headerSize, content) + box := &AVCSampleEntry{ + VisualSampleEntry: sampleEntry, + } + + // TODO: parse other fields + _ = position + + return box, nil +} diff --git a/README.md b/README.md index b2d4c01..0134af7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![pipeline status](https://gitlab.com/martinr92/gomp4/badges/main/pipeline.svg)](https://gitlab.com/martinr92/gomp4/commits/main) [![coverage report](https://gitlab.com/martinr92/gomp4/badges/main/coverage.svg)](https://gitlab.com/martinr92/gomp4/commits/main) -mp4 implementation in golang based on spec ISO ICE 14496-12:2015 +mp4 implementation in golang based on spec ISO/IEC 14496-12:2015 ## Parser @@ -16,7 +16,7 @@ if err := parser.Parse(); err != nil { ## Progress -Implementation progress of ISO ICE 14496-12:2015: +Implementation progress of ISO/IEC 14496-12:2015: | Chapter | Box Types | Parser | |----------------------------------------------------|----------------|-------:| @@ -118,6 +118,12 @@ Implementation progress of ISO ICE 14496-12:2015: | 12.2.7 Audio stream loudness | ludt | - | | 12.4.2 Hint Media Header Box | hmhd | - | +Implementation progress of ISO/IEC 14496-15:2017: + +| Chapter | Box Types | Parser | +|----------------------------------------------------|------------------------------------------|-------:| +| 5.4.2 AVC video stream definition | avc1, avc2, avc3, avc4, avcC, m4ds, btrt | 10% | + ## Helper Tools - [Online MP4 file parser](https://www.onlinemp4parser.com/) diff --git a/SampleDescriptionBox.go b/SampleDescriptionBox.go index e061d7c..7334e86 100644 --- a/SampleDescriptionBox.go +++ b/SampleDescriptionBox.go @@ -17,7 +17,6 @@ package gomp4 import ( "encoding/binary" "fmt" - "log" ) // SampleDescriptionBox sample description box struct @@ -112,17 +111,85 @@ type SampleEntry struct { DataReferenceIndex uint16 } -func ParseSampleEntry(filePosition uint64, headerSize uint32, content []byte) *SampleEntry { +func ParseSampleEntry(filePosition uint64, headerSize uint32, content []byte) (*SampleEntry, int) { box := &SampleEntry{ Box: &Box{filePosition, headerSize}, } // skip reserved - position := 2 * 8 + position := 6 // parse reference index box.DataReferenceIndex = binary.BigEndian.Uint16(content[position : position+2]) - log.Println("data reference index", box.DataReferenceIndex) // TODO: remove me + position += 2 - return box + return box, position +} + +// VisualSampleEntry visual sample entry struct +// +// 12.1.3 Sample entry +// +// Video tracks use VisualSampleEntry. +// +// In video tracks, the frame_count field must be 1 unless the specification for the media format explicitly +// documents this template field and permits larger values. That specification must document both how +// the individual frames of video are found (their size information) and their timing established. That +// timing might be as simple as dividing the sample duration by the frame count to establish the frame +// duration. +type VisualSampleEntry struct { + *SampleEntry + Width uint16 + Height uint16 + // give the resolution of the image in pixels‐per‐inch, as a fixed 16.16 number + HorizResolution Fixed1616 + // give the resolution of the image in pixels‐per‐inch, as a fixed 16.16 number + VertResolution Fixed1616 + // indicates how many frames of compressed video are stored in each sample. The + // default is 1, for one frame per sample; it may be more than 1 for multiple frames per sample + FrameCount uint16 + // is a name, for informative purposes. It is formatted in a fixed 32‐byte field, with + //the first byte set to the number of bytes to be displayed, followed by that number of bytes of + //displayable data, and then padding to complete 32 bytes total (including the size byte). The field + //may be set to 0. + CompressorName string + Depth uint16 +} + +func ParseVisualSampleEntry(filePosition uint64, headerSize uint32, content []byte) (*VisualSampleEntry, int) { + sampleEntry, position := ParseSampleEntry(filePosition, headerSize, content) + box := &VisualSampleEntry{ + SampleEntry: sampleEntry, + } + + // skip pre-defined and reserved bytes + position += 2 + 2 + 4*3 + + // parse resolution + box.Width = binary.BigEndian.Uint16(content[position : position+2]) + box.Height = binary.BigEndian.Uint16(content[position+2 : position+4]) + box.HorizResolution = NewFixed1616ByBytes(content[position+4 : position+8]) + box.VertResolution = NewFixed1616ByBytes(content[position+8 : position+12]) + position += 12 + + // skip reserved bytes + position += 4 + + // parse frame count + box.FrameCount = binary.BigEndian.Uint16(content[position : position+2]) + position += 2 + + // parse compressor name length (32bytes total) + compressorNameLength := int(content[position]) + box.CompressorName = string(content[position+1 : position+1+compressorNameLength]) + position += 32 + + // parse depth + box.Depth = binary.BigEndian.Uint16(content[position : position+2]) + position += 2 + + // skip pre-defined bytes + position += 2 + + return box, position }