76 | 76 |
def _merge_entries(path, tree1, tree2):
|
77 | 77 |
"""Merge the entries of two trees.
|
78 | 78 |
|
79 | |
:param path: A path to prepend to all tree entry names.
|
80 | |
:param tree1: The first Tree object to iterate, or None.
|
81 | |
:param tree2: The second Tree object to iterate, or None.
|
82 | |
:return: A list of pairs of TreeEntry objects for each pair of entries in
|
|
79 |
Args:
|
|
80 |
path: A path to prepend to all tree entry names.
|
|
81 |
tree1: The first Tree object to iterate, or None.
|
|
82 |
tree2: The second Tree object to iterate, or None.
|
|
83 |
Returns:
|
|
84 |
A list of pairs of TreeEntry objects for each pair of entries in
|
83 | 85 |
the trees. If an entry exists in one tree but not the other, the other
|
84 | 86 |
entry will have all attributes set to None. If neither entry's path is
|
85 | 87 |
None, they are guaranteed to match.
|
|
123 | 125 |
|
124 | 126 |
Iteration is depth-first pre-order, as in e.g. os.walk.
|
125 | 127 |
|
126 | |
:param store: An ObjectStore for looking up objects.
|
127 | |
:param tree1_id: The SHA of the first Tree object to iterate, or None.
|
128 | |
:param tree2_id: The SHA of the second Tree object to iterate, or None.
|
129 | |
:param prune_identical: If True, identical subtrees will not be walked.
|
130 | |
:return: Iterator over Pairs of TreeEntry objects for each pair of entries
|
|
128 |
Args:
|
|
129 |
store: An ObjectStore for looking up objects.
|
|
130 |
tree1_id: The SHA of the first Tree object to iterate, or None.
|
|
131 |
tree2_id: The SHA of the second Tree object to iterate, or None.
|
|
132 |
param prune_identical: If True, identical subtrees will not be walked.
|
|
133 |
Returns:
|
|
134 |
Iterator over Pairs of TreeEntry objects for each pair of entries
|
131 | 135 |
in the trees and their subtrees recursively. If an entry exists in one
|
132 | 136 |
tree but not the other, the other entry will have all attributes set
|
133 | 137 |
to None. If neither entry's path is None, they are guaranteed to
|
|
163 | 167 |
change_type_same=False):
|
164 | 168 |
"""Find the differences between the contents of two trees.
|
165 | 169 |
|
166 | |
:param store: An ObjectStore for looking up objects.
|
167 | |
:param tree1_id: The SHA of the source tree.
|
168 | |
:param tree2_id: The SHA of the target tree.
|
169 | |
:param want_unchanged: If True, include TreeChanges for unmodified entries
|
|
170 |
Args:
|
|
171 |
store: An ObjectStore for looking up objects.
|
|
172 |
tree1_id: The SHA of the source tree.
|
|
173 |
tree2_id: The SHA of the target tree.
|
|
174 |
want_unchanged: If True, include TreeChanges for unmodified entries
|
170 | 175 |
as well.
|
171 | |
:param include_trees: Whether to include trees
|
172 | |
:param rename_detector: RenameDetector object for detecting renames.
|
173 | |
:param change_type_same: Whether to report change types in the same
|
|
176 |
include_trees: Whether to include trees
|
|
177 |
rename_detector: RenameDetector object for detecting renames.
|
|
178 |
change_type_same: Whether to report change types in the same
|
174 | 179 |
entry or as delete+add.
|
175 | |
:return: Iterator over TreeChange instances for each change between the
|
|
180 |
Returns:
|
|
181 |
Iterator over TreeChange instances for each change between the
|
176 | 182 |
source and target tree.
|
177 | 183 |
"""
|
178 | 184 |
if include_trees and rename_detector is not None:
|
|
231 | 237 |
rename_detector=None):
|
232 | 238 |
"""Get the tree changes for a merge tree relative to all its parents.
|
233 | 239 |
|
234 | |
:param store: An ObjectStore for looking up objects.
|
235 | |
:param parent_tree_ids: An iterable of the SHAs of the parent trees.
|
236 | |
:param tree_id: The SHA of the merge tree.
|
237 | |
:param rename_detector: RenameDetector object for detecting renames.
|
238 | |
|
239 | |
:return: Iterator over lists of TreeChange objects, one per conflicted path
|
240 | |
in the merge.
|
241 | |
|
242 | |
Each list contains one element per parent, with the TreeChange for that
|
243 | |
path relative to that parent. An element may be None if it never
|
244 | |
existed in one parent and was deleted in two others.
|
245 | |
|
246 | |
A path is only included in the output if it is a conflict, i.e. its SHA
|
247 | |
in the merge tree is not found in any of the parents, or in the case of
|
248 | |
deletes, if not all of the old SHAs match.
|
|
240 |
Args:
|
|
241 |
store: An ObjectStore for looking up objects.
|
|
242 |
parent_tree_ids: An iterable of the SHAs of the parent trees.
|
|
243 |
tree_id: The SHA of the merge tree.
|
|
244 |
rename_detector: RenameDetector object for detecting renames.
|
|
245 |
|
|
246 |
Returns:
|
|
247 |
Iterator over lists of TreeChange objects, one per conflicted path
|
|
248 |
in the merge.
|
|
249 |
|
|
250 |
Each list contains one element per parent, with the TreeChange for that
|
|
251 |
path relative to that parent. An element may be None if it never
|
|
252 |
existed in one parent and was deleted in two others.
|
|
253 |
|
|
254 |
A path is only included in the output if it is a conflict, i.e. its SHA
|
|
255 |
in the merge tree is not found in any of the parents, or in the case of
|
|
256 |
deletes, if not all of the old SHAs match.
|
249 | 257 |
"""
|
250 | 258 |
all_parent_changes = [tree_changes(store, t, tree_id,
|
251 | 259 |
rename_detector=rename_detector)
|
|
292 | 300 |
|
293 | 301 |
Splits the data into blocks either on lines or <=64-byte chunks of lines.
|
294 | 302 |
|
295 | |
:param obj: The object to count blocks for.
|
296 | |
:return: A dict of block hashcode -> total bytes occurring.
|
|
303 |
Args:
|
|
304 |
obj: The object to count blocks for.
|
|
305 |
Returns:
|
|
306 |
A dict of block hashcode -> total bytes occurring.
|
297 | 307 |
"""
|
298 | 308 |
block_counts = defaultdict(int)
|
299 | 309 |
block = BytesIO()
|
|
325 | 335 |
def _common_bytes(blocks1, blocks2):
|
326 | 336 |
"""Count the number of common bytes in two block count dicts.
|
327 | 337 |
|
328 | |
:param block1: The first dict of block hashcode -> total bytes.
|
329 | |
:param block2: The second dict of block hashcode -> total bytes.
|
330 | |
:return: The number of bytes in common between blocks1 and blocks2. This is
|
331 | |
only approximate due to possible hash collisions.
|
|
338 |
Args:
|
|
339 |
block1: The first dict of block hashcode -> total bytes.
|
|
340 |
block2: The second dict of block hashcode -> total bytes.
|
|
341 |
Returns:
|
|
342 |
The number of bytes in common between blocks1 and blocks2. This is
|
|
343 |
only approximate due to possible hash collisions.
|
332 | 344 |
"""
|
333 | 345 |
# Iterate over the smaller of the two dicts, since this is symmetrical.
|
334 | 346 |
if len(blocks1) > len(blocks2):
|
|
344 | 356 |
def _similarity_score(obj1, obj2, block_cache=None):
|
345 | 357 |
"""Compute a similarity score for two objects.
|
346 | 358 |
|
347 | |
:param obj1: The first object to score.
|
348 | |
:param obj2: The second object to score.
|
349 | |
:param block_cache: An optional dict of SHA to block counts to cache
|
|
359 |
Args:
|
|
360 |
obj1: The first object to score.
|
|
361 |
obj2: The second object to score.
|
|
362 |
block_cache: An optional dict of SHA to block counts to cache
|
350 | 363 |
results between calls.
|
351 | |
:return: The similarity score between the two objects, defined as the
|
|
364 |
Returns:
|
|
365 |
The similarity score between the two objects, defined as the
|
352 | 366 |
number of bytes in common between the two objects divided by the
|
353 | 367 |
maximum size, scaled to the range 0-100.
|
354 | 368 |
"""
|
|
386 | 400 |
find_copies_harder=False):
|
387 | 401 |
"""Initialize the rename detector.
|
388 | 402 |
|
389 | |
:param store: An ObjectStore for looking up objects.
|
390 | |
:param rename_threshold: The threshold similarity score for considering
|
|
403 |
Args:
|
|
404 |
store: An ObjectStore for looking up objects.
|
|
405 |
rename_threshold: The threshold similarity score for considering
|
391 | 406 |
an add/delete pair to be a rename/copy; see _similarity_score.
|
392 | |
:param max_files: The maximum number of adds and deletes to consider,
|
|
407 |
max_files: The maximum number of adds and deletes to consider,
|
393 | 408 |
or None for no limit. The detector is guaranteed to compare no more
|
394 | 409 |
than max_files ** 2 add/delete pairs. This limit is provided
|
395 | 410 |
because rename detection can be quadratic in the project size. If
|
396 | 411 |
the limit is exceeded, no content rename detection is attempted.
|
397 | |
:param rewrite_threshold: The threshold similarity score below which a
|
|
412 |
rewrite_threshold: The threshold similarity score below which a
|
398 | 413 |
modify should be considered a delete/add, or None to not break
|
399 | 414 |
modifies; see _similarity_score.
|
400 | |
:param find_copies_harder: If True, consider unmodified files when
|
|
415 |
find_copies_harder: If True, consider unmodified files when
|
401 | 416 |
detecting copies.
|
402 | 417 |
"""
|
403 | 418 |
self._store = store
|