#!/usr/bin/env perl
use strict;
use warnings;
use Git;
use Git::FastExport;
use File::Spec::Functions qw( rel2abs );

our $VERSION = 0.03;

my %repo;

# process command-line parameters
while (@ARGV) {
    my ( $repo, $dir ) = split /:/, shift @ARGV, 2;
    $repo = rel2abs( $repo );
    $dir ||= '';

    # create an export parser for each repo
    my $parser
        = Git::FastExport->new( Git->repository( Directory => $repo ) );
    $parser->fast_export(qw( --progress=1 --all --date-order ));
    $parser->{mapdir} = $dir;

    # update the %repo hash
    $repo                = $parser->{source};
    $repo{$repo}{repo}   = $repo;
    $repo{$repo}{dir}    = $dir;
    $repo{$repo}{parser} = $parser;
}

# repositories that we will process
my @repos = values %repo;

my $mark = 1_000_000;    # mark counter in the new rpo
my %mark_map;            # map marks in source repos to marks in the new repo

# get the first commits
$_->{commit} = next_commit( $_->{parser} ) for @repos;

# main loop
use Data::Dumper;
my $last;
my %commits;
while (@repos) {

    # sort by date
    @repos = sort { $a->{commit}{date} <=> $b->{commit}{date} } @repos;
    my $repo = $repos[0];

    # next commit to dump
    my $commit = $repo->{commit};

    # update marks & dir in files
    for ( @{ $commit->{files} } ) {
        s/^M (\d+) :(\d+)/M $1 :$mark_map{$repo->{repo}}{$2}/;
        if ( my $dir = $repo->{dir} ) {
            s!^(M \d+ :\d+) (.*)!$1 $dir/$2!;    # filemodify
            s!^D (.*)!D $dir/$1!;                # filedelete

            # /!\ quotes may happen - die and fix if needed
            die "Choked on quoted paths in $repo->{repo}! Culprit:\n$_\n"
                if /^[CR] \S+ \S+ /;

            # filecopy | filerename
            s!^([CR]) (\S+) (\S+)!$1 $dir/$2 $dir/$3!;
        }
    }

    # first commit in the old repo linked to latest commit in new repo
    if ( $last && !$commit->{from} ) {
        $commit->{from} = ["from :$last"];
    }

    # update historical information
    my ($id) = $commit->{mark}[0] =~ /:(\d+)/g;
    $last = $id;    # last commit applied
    my $branch = ( split / /, $commit->{header} )[1];
    my $node = $commits{$id} = {
        name     => $id,
        repo     => $repo->{repo},
        branch   => $branch,
        children => [],
        merge    => exists $commit->{merge},
    };

    # this commit's parents
    my @parents = map {/:(\d+)/g} @{ $commit->{from} || [] },
        @{ $commit->{merge} || [] };

    # map each parent to its last "alien" commit
    my %parent_map = map {
        $_ => last_alien_child( $commits{$_}, $repo->{repo}, $branch )->{name}
    } @parents;

    # map parent marks
    for ( @{ $commit->{from} || [] }, @{ $commit->{merge} || [] } ) {
        if (m/^(from|merge) /) {
            s/:(\d+)/:$parent_map{$1}/g;
        }
    }

    # update the parents information
    for my $parent ( map { $parent_map{$_} } @parents ) {
        push @{ $commits{$parent}{children} }, $node->{name};
    }

    # dump the commit
    print $commit->as_string;

    # load next commit
    $repo->{commit} = next_commit( $repo->{parser} )
        or shift @repos;    # no more blocks in this export
}

# return the next commit
# - print out the intermediate blocks
# - offset the old marks
sub next_commit {
    my ($parser) = @_;
    my $block;

    while ( $block = $parser->next_block() ) {

        # map to the new mark
        for ( @{ $block->{mark} || [] } ) {
            s/:(\d+)/:$mark/
                and $mark_map{ $parser->{source} }{$1} = $mark++;
        }

        # update marks in from & merge
        for ( @{ $block->{from} || [] }, @{ $block->{merge} || [] } ) {
            if (m/^(from|merge) /) {
                s/:(\d+)/:$mark_map{$parser->{source}}{$1}/g;
            }
        }
        last if $block->{type} eq 'commit';
        print $block->as_string();
    }
    return $block;
}

# find the last child of this node
# that has either no child
# or a child in our repo
sub last_alien_child {
    my ( $node, $repo, $branch ) = @_;

    while (1) {

        # no children nodes
        return $node if ( !@{ $node->{children} } );

        # some children nodes are local
        return $node
            if grep { $commits{$_}->{repo} eq $repo } @{ $node->{children} };

        # there's a child in the same branch
        if ( my ($peer)
            = grep { $commits{$_}->{branch} eq $branch }
            @{ $node->{children} } )
        {

            # but don't go past another repo's merges
            return $node if $commits{$peer}->{merge};
            $node = $commits{$peer};
        }

        # or pick the first child (as good as any)
        else {
            $node = $commits{ $node->{children}[0] };
        }
    }
}

__END__

=head1 NAME

git-stitch-repo - Stitch several git repositories into a git-fast-import stream

=head1 SYNOPSIS

git-stitch-repo repo1 repo2:dir2 ...

=head1 DESCRIPTION

B<git-stitch-repo> will process the output of C<git-fast-export --all
--date-order> on the git repositories given on the command-line,
and create a stream suitable for B<git-fast-import> that will create
a new repository containing all the commits in a new commit tree
that respects the history of all the source repositories.

=head2 Example

Imagine we have two repositories A and B that we want to stitch into
a repository C so that all the files from A are in subdirectory F<A>
and all the files from B are in subdirectory F<B>.

Repository A:

           topic
           '     ,master
           A3---A5
          /    /
    A1---A2---A4

Branch I<master> points to A5 and branch I<topic> points to A3.

Repository B:

                 ,topic    ,master
           B3---B5---B7---B8
          /         /
    B1---B2---B4---B6

Branch I<master> points to B8 and branch I<topic> points to B5.

The C repository should preserve chronology, commit relationships and
branches as much as possible, while giving the impression that the
directories F<A> & F<B> did live side-by-side all the time.

Assuming additional timestamps not shown on the above graphs,
B<git-stitch-repo> will produce a B<git-fast-import> stream that will
create the following history:

                                    ,topic    ,master
                    A3---B3---A5---B5---B7---B8
                   /         /         /
   A1---B1---A2---B2---A4---B4--------B6


Note that the current result is slightly buggy, since A5 wasn't on the
I<topic> branch in the original graph for A.

=head1 AUTHOR

Philippe Bruhat (BooK), C<< <book@cpan.org> >>.

=head1 ACKNOWLEDGEMENTS

The original version of this script was created as part of my work
for BOOKING.COM, which authorized its publication/distribution
under the same terms as Perl itself.

=head1 COPYRIGHT

Copyright 2008 Philippe Bruhat (BooK), All Rights Reserved.

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

