Archiving Binaries on Github

Posted by : on

Category : powershell   scripts   archive


ORIGIN

I wanted to archive a big ZIP file on Github. But as you can imagine, github prevents the usage of repositories to archive sizeable binaries like movies, audio files, zip files, etc…

Concept

I created a script that changes a given binary file so that it is split in files on a maximum size and with the content being text, that is, converted to Base64.


SPLITTING BINARY FILE


function Invoke-SplitDataFile {
    [CmdletBinding(SupportsShouldProcess)]
    param(
        [Parameter(Mandatory = $true)]
        [string]$Path,
        [Parameter(Mandatory = $false)]
        [int64]$Newsize = 1MB,
        [Parameter(Mandatory = $false)]
        [string]$OutPath,
        [Parameter(Mandatory = $false)]
        [string]$Extension = "cpp",
        [Parameter(Mandatory = $false)]
        [switch]$AsString
    )

    if ($Newsize -le 0)
    {
        Write-Error "Only positive sizes allowed"
        return
    }

    $FileSize = (Get-Item $Path).Length
    $SyncStopWatch = [System.Diagnostics.Stopwatch]::StartNew()
    $Script:ProgressTitle = "Split Files"
    $TotalTicks = 0
    $Count = [math]::Round($FileSize / $Newsize)
    $Script:StepNumber = 1
    $Script:TotalSteps = $Count + 3
    if ($PSBoundParameters.ContainsKey('OutPath') -eq $False) {
        $OutPath = [IO.Path]::GetDirectoryName($Path)

        Write-Verbose "Using OutPath from Path $Path"
    } else {
        Write-Verbose "Using OutPath $OutPath"
    }
    $OutPath = $OutPath.TrimEnd('\')

    if (-not (Test-Path -Path "$OutPath")) {
        Write-Verbose "CREATING $OutPath"
        $Null = New-Item $OutPath -ItemType Directory -Force -ErrorAction Ignore
    }

    $FILENAME = [IO.Path]::GetFileNameWithoutExtension($Path)


    $MAXVALUE = 1GB # Hard maximum limit for Byte array for 64-Bit .Net 4 = [INT32]::MaxValue - 56, see here https://stackoverflow.com/questions/3944320/maximum-length-of-byte
    # but only around 1.5 GB in 32-Bit environment! So I chose 1 GB just to be safe
    $PASSES = [math]::Floor($Newsize / $MAXVALUE)
    $REMAINDER = $Newsize % $MAXVALUE
    if ($PASSES -gt 0) { $BUFSIZE = $MAXVALUE } else { $BUFSIZE = $REMAINDER }

    $OBJREADER = New-Object System.IO.BinaryReader ([System.IO.File]::Open($Path, 'Open', 'Read', 'Read')) # for reading)
    [Byte[]]$BUFFER = New-Object Byte[] $BUFSIZE
    $NUMFILE = 1

    do {
        $Extension = $Extension.TrimStart('.')
        $NEWNAME = "{0}\{1}{2:d4}.{3}" -f $OutPath, $FILENAME, $NUMFILE, $Extension
        $Script:ProgressMessage = "Split {0} of {1} files" -f $Script:StepNumber, $Script:TotalSteps
        Invoke-AutoUpdateProgress_FileUtils
        $Script:StepNumber++
        $COUNT = 0
        $OBJWRITER = $NULL
        [int32]$BYTESREAD = 0
        while (($COUNT -lt $PASSES) -and (($BYTESREAD = $OBJREADER.Read($BUFFER, 0, $BUFFER.Length)) -gt 0))
        {
            Write-Verbose "[Invoke-SplitDataFile] Reading $BYTESREAD bytes"
            if ($AsString) {
                $DataString = [convert]::ToBase64String($BUFFER, 0, $BYTESREAD)
                Write-Verbose "[Invoke-SplitDataFile] WRITING DataString to $NEWNAME"
                Set-Content $NEWNAME $DataString
            } else {
                if (!$OBJWRITER)
                {
                    $OBJWRITER = New-Object System.IO.BinaryWriter ([System.IO.File]::Create($NEWNAME))
                    Write-Verbose " + CREATING $NEWNAME"
                }
                Write-Verbose "[Invoke-SplitDataFile] WRITING $BYTESREAD bytes to $NEWNAME"
                $OBJWRITER.Write($BUFFER, 0, $BYTESREAD)
            }
            $COUNT++
        }
        if (($REMAINDER -gt 0) -and (($BYTESREAD = $OBJREADER.Read($BUFFER, 0, $REMAINDER)) -gt 0))
        {
            Write-Verbose "[Invoke-SplitDataFile] Reading $BYTESREAD bytes"
            if ($AsString) {
                $DataString = [convert]::ToBase64String($BUFFER, 0, $BYTESREAD)
                Write-Verbose "[Invoke-SplitDataFile] WRITING DataString to $NEWNAME"
                Set-Content $NEWNAME $DataString
            } else {
                if (!$OBJWRITER)
                {
                    $OBJWRITER = New-Object System.IO.BinaryWriter ([System.IO.File]::Create($NEWNAME))
                    Write-Verbose " + CREATING $NEWNAME"
                }
                Write-Verbose "[Invoke-SplitDataFile] WRITING $BYTESREAD bytes to $NEWNAME"
                $OBJWRITER.Write($BUFFER, 0, $BYTESREAD)
            }
        }

        if ($OBJWRITER) { $OBJWRITER.Close() }
        if ($BYTESREAD) {
            Write-FileHeader -Path $NEWNAME -PartId $NUMFILE
        }

        ++ $NUMFILE
    } while ($BYTESREAD -gt 0)

    $OBJREADER.Close()
}




RE-COMBINING BINARY FILE from TEXT FILES


function Sort-Lexically {
    [CmdletBinding()]
    param(
        [Parameter(Position = 0, ValueFromPipeline = $true, Mandatory = $true)]
        [object]$InputObject,

        [Parameter(Mandatory = $false)]
        [string]$Property
    )

    begin {
        $items = @()
    }

    process {
        $items += $InputObject
    }

    end {
        $items | Sort-Object {
            $name = if ($Property) { $_.$Property } else { $_ }

            if ($name -match '(\d+)(?=\D*$)') {
                [int]$matches[1] # numeric suffix before non-digits at the end (e.g., .cpp)
            } else {
                $name
            }
        }
    }
}

function Sort-ByFileHeaderId {
    [CmdletBinding()]
    param(
        [Parameter(Position = 0, ValueFromPipeline = $true, Mandatory = $true)]
        [string[]]$Path
    )

    begin {
        $items = @()
    }

    process {
        $items += $Path
    }

    end {
        $items |
        ForEach-Object {
            try {
                $header = Read-FileHeader -Path $_
                [pscustomobject]@{
                    Path = $_
                    PartID = $header.PartID
                }
            } catch {
                Write-Warning "Skipping invalid or corrupt header: $_"
            }
        } |
        Sort-Object PartID |
        Select-Object -ExpandProperty Path
    }
}


function Invoke-SplitDataFile {
    [CmdletBinding(SupportsShouldProcess)]
    param(
        [Parameter(Mandatory = $true)]
        [string]$Path,
        [Parameter(Mandatory = $false)]
        [int64]$Newsize = 1MB,
        [Parameter(Mandatory = $false)]
        [string]$OutPath,
        [Parameter(Mandatory = $false)]
        [string]$Extension = "cpp",
        [Parameter(Mandatory = $false)]
        [switch]$AsString
    )

    if ($Newsize -le 0)
    {
        Write-Error "Only positive sizes allowed"
        return
    }

    $FileSize = (Get-Item $Path).Length
    $SyncStopWatch = [System.Diagnostics.Stopwatch]::StartNew()
    $Script:ProgressTitle = "Split Files"
    $TotalTicks = 0
    $Count = [math]::Round($FileSize / $Newsize)
    $Script:StepNumber = 1
    $Script:TotalSteps = $Count + 3
    if ($PSBoundParameters.ContainsKey('OutPath') -eq $False) {
        $OutPath = [IO.Path]::GetDirectoryName($Path)

        Write-Verbose "Using OutPath from Path $Path"
    } else {
        Write-Verbose "Using OutPath $OutPath"
    }
    $OutPath = $OutPath.TrimEnd('\')

    if (-not (Test-Path -Path "$OutPath")) {
        Write-Verbose "CREATING $OutPath"
        $Null = New-Item $OutPath -ItemType Directory -Force -ErrorAction Ignore
    }

    $FILENAME = [IO.Path]::GetFileNameWithoutExtension($Path)


    $MAXVALUE = 1GB # Hard maximum limit for Byte array for 64-Bit .Net 4 = [INT32]::MaxValue - 56, see here https://stackoverflow.com/questions/3944320/maximum-length-of-byte
    # but only around 1.5 GB in 32-Bit environment! So I chose 1 GB just to be safe
    $PASSES = [math]::Floor($Newsize / $MAXVALUE)
    $REMAINDER = $Newsize % $MAXVALUE
    if ($PASSES -gt 0) { $BUFSIZE = $MAXVALUE } else { $BUFSIZE = $REMAINDER }

    $OBJREADER = New-Object System.IO.BinaryReader ([System.IO.File]::Open($Path, 'Open', 'Read', 'Read')) # for reading)
    [Byte[]]$BUFFER = New-Object Byte[] $BUFSIZE
    $NUMFILE = 1

    do {
        $Extension = $Extension.TrimStart('.')
        $NEWNAME = "{0}\{1}{2:d4}.{3}" -f $OutPath, $FILENAME, $NUMFILE, $Extension
        $Script:ProgressMessage = "Split {0} of {1} files" -f $Script:StepNumber, $Script:TotalSteps
        Invoke-AutoUpdateProgress_FileUtils
        $Script:StepNumber++
        $COUNT = 0
        $OBJWRITER = $NULL
        [int32]$BYTESREAD = 0
        while (($COUNT -lt $PASSES) -and (($BYTESREAD = $OBJREADER.Read($BUFFER, 0, $BUFFER.Length)) -gt 0))
        {
            Write-Verbose "[Invoke-SplitDataFile] Reading $BYTESREAD bytes"
            if ($AsString) {
                $DataString = [convert]::ToBase64String($BUFFER, 0, $BYTESREAD)
                Write-Verbose "[Invoke-SplitDataFile] WRITING DataString to $NEWNAME"
                Set-Content $NEWNAME $DataString
            } else {
                if (!$OBJWRITER)
                {
                    $OBJWRITER = New-Object System.IO.BinaryWriter ([System.IO.File]::Create($NEWNAME))
                    Write-Verbose " + CREATING $NEWNAME"
                }
                Write-Verbose "[Invoke-SplitDataFile] WRITING $BYTESREAD bytes to $NEWNAME"
                $OBJWRITER.Write($BUFFER, 0, $BYTESREAD)
            }
            $COUNT++
        }
        if (($REMAINDER -gt 0) -and (($BYTESREAD = $OBJREADER.Read($BUFFER, 0, $REMAINDER)) -gt 0))
        {
            Write-Verbose "[Invoke-SplitDataFile] Reading $BYTESREAD bytes"
            if ($AsString) {
                $DataString = [convert]::ToBase64String($BUFFER, 0, $BYTESREAD)
                Write-Verbose "[Invoke-SplitDataFile] WRITING DataString to $NEWNAME"
                Set-Content $NEWNAME $DataString
            } else {
                if (!$OBJWRITER)
                {
                    $OBJWRITER = New-Object System.IO.BinaryWriter ([System.IO.File]::Create($NEWNAME))
                    Write-Verbose " + CREATING $NEWNAME"
                }
                Write-Verbose "[Invoke-SplitDataFile] WRITING $BYTESREAD bytes to $NEWNAME"
                $OBJWRITER.Write($BUFFER, 0, $BYTESREAD)
            }
        }

        if ($OBJWRITER) { $OBJWRITER.Close() }
        if ($BYTESREAD) {
            Write-FileHeader -Path $NEWNAME -PartId $NUMFILE
        }

        ++ $NUMFILE
    } while ($BYTESREAD -gt 0)

    $OBJREADER.Close()
}



function Invoke-CombineSplitFiles {

    [CmdletBinding(SupportsShouldProcess = $true)]
    param(
        [Parameter(Position = 0, Mandatory = $true, HelpMessage = "Path to the folder containing split parts")]
        [string]$Path,
        [Parameter(Position = 1, Mandatory = $true, HelpMessage = "Path of the recombined file")]
        [string]$Destination,
        [Parameter(Mandatory = $false, HelpMessage = "Encoding type")]
        [ValidateSet('base64','raw')]
        [string]$Type='base64'
    )
    [bool]$EncodedAsString = ($Type -eq 'base64')

    $SyncStopWatch = [System.Diagnostics.Stopwatch]::StartNew()
    $Script:ProgressTitle = "Combine Split Files"
    $TotalTicks = 0
    $Basename = ''

    $Path = $Path.TrimEnd('\')
    Write-Verbose "Path is $Path"

    $Files = (Get-ChildItem $Path -File -Filter "$Basename*.cpp").FullName
    try{
       $SortedFiles = $Files | Sort-ByFileHeaderId
    }catch{
        Write-Error "$_"
    }
    $FilesCount = $SortedFiles.Count
    $Script:TotalSteps = $FilesCount
    $Script:StepNumber = 1

    if(![System.IO.File]::Exists("$Destination")){
        New-Item -Path "$Destination" -ItemType File -FOrce -ErrorAction Ignore | Out-Null
    }

    # Open file stream for output
    $FileStream = [System.IO.File]::Open($Destination, 'Create', 'Write', 'Write') # for writing

    [bool]$RecombinedSuccessfully = $True

    try {
        foreach ($f in $SortedFiles) {
            if (-not (Test-Path -Path $f)) {
                throw "missing file: $f"
            }

            $HeaderData = Remove-FileHeader -Path $f

            if ($EncodedAsString) {
                [string]$Base64String = Get-Content -LiteralPath $f -Raw
                [byte[]]$ReadBytes = [Convert]::FromBase64String($Base64String)
                $FileStream.Write($ReadBytes, 0, $ReadBytes.Length)
            } else {
                [byte[]]$ReadBytes = Get-Content -LiteralPath $f -Raw -AsByteStream
                $FileStream.Write($ReadBytes, 0, $ReadBytes.Length)
            }

            $Script:ProgressMessage = "Wrote part $Script:StepNumber of $Script:TotalSteps"
            Invoke-AutoUpdateProgress_FileUtils
        }
    }catch{
        $RecombinedSuccessfully = $False
        Write-Error "Error on $f . $_"
    }finally {
        $FileStream.Close()
    }

    if($RecombinedSuccessfully){
        Write-Host "Recombined Successfully ! Wrote combined file to $Destination"
    }
  
}




Get the code

GitHub

Important Note Do You have Issues accessing the core repository? Don’t be shy and send me an EMAIL at guillaumeplante.qc@gmail.com and I will fix access for you


About Guillaume Plante
Guillaume Plante

A developper with a passion for technology, music, astronomy and art. Coding range: hardware/drivers, security, ai,. c/c++, powershell

Email : guillaumeplante.qc@gmail.com

Website : https://arsscriptum.ddns.net

Useful Links