forked from newren/git-filter-repo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert-svnexternals
587 lines (487 loc) · 20.5 KB
/
convert-svnexternals
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
#!/usr/bin/env python3
"""
This is a program that will insert Git submodules according to SVN externals
definitions (svn:externals properties) from the original Subversion repository
throughout the history.
Information about the externals is obtained from the ".gitsvnextmodules" file
created during SVN-to-Git conversion by SubGit (https://subgit.com/). Its
config option "translate.externals=true" had to be used therefore.
Actual modifications:
- Insert gitlinks (mode 160000) into the tree.
- Add .gitmodules file with relevant sections.
- Remove sections converted to submodules from .gitsvnextmodules file
and delete it if empty.
.gitsvnextmodules example:
[submodule "somedir/extdir"]
path = somedir/extdir
owner = somedir
url = https://svn.example.com/somesvnrepo/trunk
revision = 1234
branch = /
fetch = :refs/remotes/git-svn
remote = svn
type = dir
Resulting addition in "somedir" tree (cat-file pretty-print format):
160000 commit 1234123412341234123412341234123412341234 extdir
Resulting .gitmodules entry:
[submodule "somedir/extdir"]
path = somedir/extdir
url = https://git.example.com/somegitrepo.git
SVN-to-Git mapping file:
Can be created from SubGit's "refs/svn/map".
One line per mapping in following format:
<svn url> TAB <svn rev> TAB <git url> TAB <git commit> TAB <state>
- Leading '#' can be used for comments.
- <svn url> must not contain a trailing slash.
- <state> has to be "commit" to be usable, but can be "missing" if <git commit>
does not exist in the repository anymore. Adopted from git-cat-file output.
Example:
https://svn.example.com/somesvnrepo/trunk 1234 https://git.example.com/somegitrepo.git 1234123412341234123412341234123412341234 commit
Features:
- Repeatedly added/removed externals will be handled properly.
- Externals replaced by directly added files and vice versa will be handled
properly.
Caveats:
- This script must NOT be run repeatedly. A second invocation would lead to a
different result in case the externals could only be converted partially.
- Inconsistent SVN repositories (with failing checkout) not handled, i.e.
- normal directory and external with the same path
- external path not existing for the given revision
- No attention was paid to non-ASCII and special characters in gitlink paths,
might cause problems.
- There is no error handling for mandatory options missing in .gitsvnextmodules
file. The script would crash in case of such buggy files, but that shouldn't
happen in practice.
TODO:
- Add external files directly.
- Alternatively add external directories directly instead of using a submodule.
"""
"""
Please see the
***** API BACKWARD COMPATIBILITY CAVEAT *****
near the top of git-filter-repo.
"""
import argparse
import os
import sys
import shutil
import subprocess
import configparser
from urllib.parse import urlsplit
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
svn_root_url = ""
svn_git_mappings = []
def parse_args():
"""
Parse and return arguments for this script.
Also do some argument sanity checks and adaptions.
"""
parser = argparse.ArgumentParser(
description="Add Git submodules according to svn:externals from .gitsvnextmodules. "
"As preparation for this conversion process, an analysis can be performed.")
parser.add_argument('--force', '-f', action='store_true',
help="Rewrite repository history even if the current repo does not "
"look like a fresh clone.")
parser.add_argument('--refs', nargs='+',
help="Limit history rewriting to the specified refs. Option is directly "
"forwarded to git-filter-repo, see there for details and caveats. "
"Use for debugging purposes only!")
parser.add_argument('--svn-root-url',
help="Root URL of the corresponding SVN repository, "
"needed for conversion of relative to absolute external URLs.")
analysis = parser.add_argument_group(title="Analysis")
analysis.add_argument('--analyze', action='store_true',
help="Analyze repository history and create auxiliary files for conversion process.")
analysis.add_argument('--report-dir', type=os.fsencode,
help="Directory to write report, defaults to GIT_DIR/filter-repo/svnexternals, "
"refuses to run if exists, --force delete existing dir first.")
conversion = parser.add_argument_group(title="Conversion")
conversion.add_argument('--svn-git-mapfiles', type=os.fsencode, nargs='+', metavar='MAPFILE',
help="Files with SVN-to-Git revision mappings for SVN externals conversion.")
args = parser.parse_args()
if args.analyze and args.svn_git_mapfiles:
raise SystemExit("Error: --svn-git-mapfiles makes no sense with --analyze.")
if not args.analyze and not args.svn_git_mapfiles:
raise SystemExit("Error: --svn-git-mapfiles is required for the conversion process.")
return args
def read_mappings(mapfiles):
"""
Read files with SVN-to-Git mappings and return a list of mappings from it.
"""
mappings = []
for mapfile in mapfiles:
with open(mapfile, "rb") as f:
for line in f:
line = line.rstrip(b'\r\n')
# Skip blank and comment lines
if not line or line.startswith(b'#'):
continue
# Convert to string for use with configparser later
line = line.decode()
# Parse the line
fields = line.split('\t', 4)
mapping = {'svn_url': fields[0],
'svn_rev': int(fields[1]),
'git_url': fields[2],
'git_commit': fields[3],
'state': fields[4]}
mappings.append(mapping)
return mappings
cat_file_process = None
def parse_config(blob_id):
"""
Create a configparser object for a .gitsvnextmodules/.gitmodules file from
its blob ID.
"""
parsed_config = configparser.ConfigParser()
if blob_id is not None:
# Get the blob contents
cat_file_process.stdin.write(blob_id + b'\n')
cat_file_process.stdin.flush()
objhash, objtype, objsize = cat_file_process.stdout.readline().split()
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
# Parse it
parsed_config.read_string(contents_plus_newline.decode())
return parsed_config
def create_blob(parsed_config):
"""
Create a filter-repo blob object from a .gitsvnextmodules/.gitmodules
configparser object according to Git config style.
"""
lines = []
for sec in parsed_config.sections():
lines.append("[" + sec + "]\n")
for opt in parsed_config.options(sec):
lines.append("\t" + opt + " = " + parsed_config[sec][opt] + "\n")
return fr.Blob(''.join(lines).encode())
def get_git_url(svn_url):
"""
Get the Git URL for a corresponding SVN URL.
"""
for entry in svn_git_mappings:
if entry['svn_url'] == svn_url:
return entry['git_url']
else:
return None
def get_git_commit_hash(svn_url, svn_rev):
"""
Get the Git commit hash for its corresponding SVN URL+revision.
The mapping is not restricted to the exact revision, but also uses the next
lower revision found. Needed when the revision was set to that of the root
URL instead of to that of the specific subdirectory (e.g. trunk). TortoiseSVN
behaves so when setting the external to HEAD.
"""
ent = None
rev = 0
for entry in svn_git_mappings:
if (entry['svn_url'] == svn_url
and entry['svn_rev'] <= svn_rev
and entry['svn_rev'] > rev):
ent = entry
rev = entry['svn_rev']
if ent is not None and ent['state'] == "commit":
return ent['git_commit']
else:
return None
def get_absolute_svn_url(svnext_url, svn_root_url):
"""
Convert a relative svn:externals URL to an absolute one.
If the format is unsupported, return the URL unchanged with success=False.
If no root URL is given or the URL is absolute already, return it unchanged.
In all cases, even if returned "unchanged", trailing slashes are removed.
"""
# Remove trailing slash(es)
svnext_url = svnext_url.rstrip("/")
svn_root_url = svn_root_url.rstrip("/")
# Normalize URLs in relative format
svn_root_parsed = urlsplit(svn_root_url)
if svnext_url.startswith(("../", "^/../")): # unsupported
return (False, svnext_url)
elif not svn_root_url:
pass # unchanged
elif svnext_url.startswith("^/"):
svnext_url = svn_root_url + svnext_url[1:]
elif svnext_url.startswith("//"):
svnext_url = svn_root_parsed.scheme + ":" + svnext_url
elif svnext_url.startswith("/"):
svnext_url = svn_root_parsed.scheme + "://" + svn_root_parsed.netloc + svnext_url
return True, svnext_url
def add_submodule_tree_entry(commit, parsed_config, section):
"""
Add a submodule entry to the tree of a Git commit.
SVN externals information obtained from parsed .gitsvnextmodules file.
"""
# Skip type=file (SVN file external), not possible as submodule
if parsed_config[section]['type'] != 'dir':
return False
success, svn_url = get_absolute_svn_url(parsed_config[section]['url'], svn_root_url)
# Skip unsupported URL format
if not success:
return False
# Get SVN revision
if parsed_config.has_option(section, 'revision'):
svn_rev = int(parsed_config[section]['revision'])
else:
# TODO: revision has to be guessed according to commit timestamp, skip for now
return False
# SVN url+revision mapping to Git commit
git_hash = get_git_commit_hash(svn_url, svn_rev)
# Skip missing or unusable mapping
if git_hash is None:
return False
git_hash = git_hash.encode()
dirname = parsed_config[section]['path'].encode()
# Add gitlink to tree
commit.file_changes.append(fr.FileChange(b'M', dirname, git_hash, b'160000'))
return True
def get_commit_map_path():
"""
Return path to commit-map file.
"""
git_dir = fr.GitUtils.determine_git_dir(b'.')
return os.path.join(git_dir, b'filter-repo', b'commit-map')
def parse_commit_map(commit_map_file):
"""
Parse commit-map file and return a dictionary.
"""
parsed_map = {}
with open(commit_map_file, "rb") as f:
for line in f:
line = line.rstrip(b'\r\n')
# Skip blank lines
if not line:
continue
# Store old/new commits, also the "old"/"new" header in the first line
old, new = line.split()
parsed_map[old] = new
return parsed_map
def merge_commit_maps(old_commit_map, new_commit_map):
"""
Merge old and new commit-map by omitting intermediate commits.
Return the merged dictionary.
"""
merged_map = {}
for (key, old_val) in old_commit_map.items():
new_val = new_commit_map[old_val] if old_val in new_commit_map else old_val
merged_map[key] = new_val
return merged_map
def write_commit_map(commit_map, commit_map_file):
"""
Write commit-map dictionary to file.
"""
with open(commit_map_file, 'wb') as f:
for (old, new) in commit_map.items():
f.write(b'%-40s %s\n' % (old, new))
def create_report_dir(args):
"""
Create the directory for analysis report.
"""
if args.report_dir:
reportdir = args.report_dir
else:
git_dir = fr.GitUtils.determine_git_dir(b'.')
# Create the report directory as necessary
results_tmp_dir = os.path.join(git_dir, b'filter-repo')
if not os.path.isdir(results_tmp_dir):
os.mkdir(results_tmp_dir)
reportdir = os.path.join(results_tmp_dir, b'svnexternals')
if os.path.isdir(reportdir):
if args.force:
sys.stdout.write("Warning: Removing recursively: \"%s\"" % fr.decode(reportdir))
shutil.rmtree(reportdir)
else:
sys.stdout.write("Error: dir already exists (use --force to delete): \"%s\"\n" % fr.decode(reportdir))
sys.exit(1)
os.mkdir(reportdir)
return reportdir
analysis = {'dir_ext_orig': [],
'dir_ext_abs': [],
'file_ext_orig': [],
'file_ext_abs': []}
def write_analysis(reportdir):
"""
Prepare analysis and write it to files in report directory.
"""
analysis['dir_ext_orig'].sort()
analysis['dir_ext_abs'].sort()
analysis['file_ext_orig'].sort()
analysis['file_ext_abs'].sort()
sys.stdout.write("Writing reports to %s..." % fr.decode(reportdir))
sys.stdout.flush()
with open(os.path.join(reportdir, b"dir-externals-original.txt"), 'wb') as f:
for url in analysis['dir_ext_orig']:
f.write(("%s\n" % url).encode())
with open(os.path.join(reportdir, b"dir-externals-absolute.txt"), 'wb') as f:
for url in analysis['dir_ext_abs']:
f.write(("%s\n" % url).encode())
with open(os.path.join(reportdir, b"file-externals-original.txt"), 'wb') as f:
for url in analysis['file_ext_orig']:
f.write(("%s\n" % url).encode())
with open(os.path.join(reportdir, b"file-externals-absolute.txt"), 'wb') as f:
for url in analysis['file_ext_abs']:
f.write(("%s\n" % url).encode())
sys.stdout.write("done.\n")
def analyze_externals(commit, metadata):
"""
Generate/extend analysis of SVN externals for a Git commit.
Used as filter-repo commit callback.
"""
for change in commit.file_changes:
if change.filename == b'.gitsvnextmodules' and change.type == b'M':
gitsvnextmodules = parse_config(change.blob_id)
for sec in gitsvnextmodules.sections():
url = gitsvnextmodules[sec]['url']
success, abs_url = get_absolute_svn_url(url, svn_root_url)
# List of svn:externals URLs, also add the URL to the absolute list if
# conversion was not successful
if gitsvnextmodules[sec]['type'] == 'dir':
if url not in analysis['dir_ext_orig']:
analysis['dir_ext_orig'].append(url)
if abs_url not in analysis['dir_ext_abs']:
analysis['dir_ext_abs'].append(abs_url)
else:
if url not in analysis['file_ext_orig']:
analysis['file_ext_orig'].append(url)
if abs_url not in analysis['file_ext_abs']:
analysis['file_ext_abs'].append(abs_url)
def insert_submodules(commit, metadata):
"""
Insert submodules for a Git commit.
Used as filter-repo commit callback.
Since .gitsvnextmodules just contains the svn:externals state for the given
commit, we cannot derive specific changes from that file.
So we can only add/modify the gitlinks according to .gitsvnextmodules
(without knowing whether adding a new or modifying an existing or even
"modifying" an unchanged submodule, but none of that really matters).
We do not have information about deleted externals, those will be handled in
a separate filter run afterwards.
The .gitmodules file however will already be correct in this function because
we don't need to know about specific changes to add, modify or delete it.
"""
for change in commit.file_changes:
if change.filename == b'.gitsvnextmodules' and change.type in (b'M', b'D'):
gitsvnextmodules = parse_config(change.blob_id)
gitmodules = configparser.ConfigParser()
# Add gitlinks to the tree and prepare .gitmodules file content
for sec in gitsvnextmodules.sections():
if add_submodule_tree_entry(commit, gitsvnextmodules, sec):
# Gitlink added
# -> Add this entry to .gitmodules as well
# Create the section name string manually, do not rely on
# .gitsvnextmodules to always use the proper section name.
sec_name = 'submodule "' + gitsvnextmodules[sec]['path'] + '"'
gitmodules[sec_name] = {}
# submodule.<name>.path
gitmodules[sec_name]['path'] = gitsvnextmodules[sec]['path']
# submodule.<name>.url
success, svn_url = get_absolute_svn_url(gitsvnextmodules[sec]['url'], svn_root_url)
git_url = get_git_url(svn_url)
if git_url is not None:
gitmodules[sec_name]['url'] = git_url
else:
# Abort, but this will not happen in practice, catched in
# add_submodule_tree_entry() via get_git_commit_hash() already.
raise SystemExit("Error: No Git URL found in mapping although a commit hash could be found.")
# Write blob and adapt tree for .gitmodules
if gitmodules.sections():
# Create a blob object from the content and add it to the tree.
blob = create_blob(gitmodules)
filter.insert(blob)
commit.file_changes.append(fr.FileChange(b'M', b'.gitmodules', blob.id, b'100644'))
else:
# Delete the file, even if a "git rm" of all submodules keeps it empty.
commit.file_changes.append(fr.FileChange(b'D', b'.gitmodules'))
def delete_submodules(commit, metadata):
"""
Delete submodules from a Git commit.
Used as filter-repo commit callback.
Delete all submodules (inserted in the previous filter run) without an entry
in .gitsvnextmodules, these were real deletions of externals, which couldn't
be detected before.
Only the tree entries have to be removed because the .gitmodules file is
already in correct state from previous filter run.
"""
for change in commit.file_changes:
if change.filename == b'.gitsvnextmodules' and change.type in (b'M', b'D'):
gitsvnextmodules = parse_config(change.blob_id)
# Search for all submodules in the tree
output = subprocess.check_output('git ls-tree -d -r -z'.split() + [commit.original_id])
for line in output.split(b'\x00'):
if not line:
continue
mode_objtype_objid, dirname = line.split(b'\t', 1)
mode, objtype, objid = mode_objtype_objid.split(b' ')
if mode == b'160000' and objtype == b'commit':
# Submodule found
# -> Delete it if there is no corresponding entry in
# .gitsvnextmodules, keep/reinsert it otherwise
for sec in gitsvnextmodules.sections():
if gitsvnextmodules[sec]['path'].encode() == dirname:
# Reinsert it, might have been deleted in previous commits
if add_submodule_tree_entry(commit, gitsvnextmodules, sec):
# And remove the config section because this external has been
# converted
gitsvnextmodules.remove_section(sec)
break
else:
# Delete it
commit.file_changes.append(fr.FileChange(b'D', dirname))
# Rewrite .gitsvnextmodules to contain the unhandled externals only,
# delete it if empty (all externals converted).
if gitsvnextmodules.sections():
# Create a blob object from the content and replace the original one.
blob = create_blob(gitsvnextmodules)
filter.insert(blob)
change.blob_id = blob.id
else:
if change.type == b'M':
# File became empty, delete it
commit.file_changes.append(fr.FileChange(b'D', b'.gitsvnextmodules'))
break # avoid endless for loop
#else:
# File was empty already, delete command already present in stream
my_args = parse_args()
# Use passed URL without trailing slash(es)
if my_args.svn_root_url:
svn_root_url = my_args.svn_root_url.rstrip("/")
# Arguments forwarded to filter-repo
extra_args = []
if my_args.force:
extra_args = ['--force']
if my_args.refs:
extra_args += ['--refs'] + my_args.refs
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
if my_args.analyze:
# Analysis
reportdir = create_report_dir(my_args)
fr_args = fr.FilteringOptions.parse_args(['--dry-run']
+ extra_args)
filter = fr.RepoFilter(fr_args, commit_callback=analyze_externals)
filter.run()
write_analysis(reportdir)
else:
# Conversion
svn_git_mappings = read_mappings(my_args.svn_git_mapfiles)
# There are no references to commit hashes in commit messages because this
# script runs on a Git repository converted from a Subversion repository.
fr_args = fr.FilteringOptions.parse_args(['--preserve-commit-hashes',
'--preserve-commit-encoding',
'--replace-refs', 'update-no-add']
+ extra_args)
filter = fr.RepoFilter(fr_args, commit_callback=insert_submodules)
filter.run()
# Store commit-map after first run
first_commit_map = parse_commit_map(get_commit_map_path())
filter = fr.RepoFilter(fr_args, commit_callback=delete_submodules)
filter.run()
# Update commit-map after second run, based on original IDs
second_commit_map = parse_commit_map(get_commit_map_path())
merged_commit_map = merge_commit_maps(first_commit_map, second_commit_map)
write_commit_map(merged_commit_map, get_commit_map_path())
cat_file_process.stdin.close()
cat_file_process.wait()