Dulwich.io dulwich / d6d3c08
Convert more docstrings to Google style. Jelmer Vernooń≥ 8 days ago
8 changed file(s) with 551 addition(s) and 405 deletion(s). Raw diff Collapse all Expand all
171171 def write_index(f, entries):
172172 """Write an index file.
173173
174 :param f: File-like object to write to
175 :param entries: Iterable over the entries to write
174 Args:
175 f: File-like object to write to
176 entries: Iterable over the entries to write
176177 """
177178 f.write(b'DIRC')
178179 f.write(struct.pack(b'>LL', 2, len(entries)))
195196
196197 This will return a mode that can be stored in a tree object.
197198
198 :param mode: Mode to clean up.
199 Args:
200 mode: Mode to clean up.
199201 """
200202 if stat.S_ISLNK(mode):
201203 return stat.S_IFLNK
214216 def __init__(self, filename):
215217 """Open an index file.
216218
217 :param filename: Path to the index file
219 Args:
220 filename: Path to the index file
218221 """
219222 self._filename = filename
220223 self.clear()
258261 def __getitem__(self, name):
259262 """Retrieve entry by relative path.
260263
261 :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
264 Returns: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
262265 flags)
263266 """
264267 return self._byname[name]
313316 def changes_from_tree(self, object_store, tree, want_unchanged=False):
314317 """Find the differences between the contents of this index and a tree.
315318
316 :param object_store: Object store to use for retrieving tree contents
317 :param tree: SHA1 of the root tree
318 :param want_unchanged: Whether unchanged files should be reported
319 :return: Iterator over tuples with (oldpath, newpath), (oldmode,
319 Args:
320 object_store: Object store to use for retrieving tree contents
321 tree: SHA1 of the root tree
322 want_unchanged: Whether unchanged files should be reported
323 Returns: Iterator over tuples with (oldpath, newpath), (oldmode,
320324 newmode), (oldsha, newsha)
321325 """
322326 def lookup_entry(path):
330334 def commit(self, object_store):
331335 """Create a new tree from an index.
332336
333 :param object_store: Object store to save the tree in
334 :return: Root tree SHA
337 Args:
338 object_store: Object store to save the tree in
339 Returns:
340 Root tree SHA
335341 """
336342 return commit_tree(object_store, self.iterobjects())
337343
339345 def commit_tree(object_store, blobs):
340346 """Commit a new tree.
341347
342 :param object_store: Object store to add trees to
343 :param blobs: Iterable over blob path, sha, mode entries
344 :return: SHA1 of the created tree.
348 Args:
349 object_store: Object store to add trees to
350 blobs: Iterable over blob path, sha, mode entries
351 Returns:
352 SHA1 of the created tree.
345353 """
346354
347355 trees = {b'': {}}
379387 def commit_index(object_store, index):
380388 """Create a new tree from an index.
381389
382 :param object_store: Object store to save the tree in
383 :param index: Index file
384 :note: This function is deprecated, use index.commit() instead.
385 :return: Root tree sha.
390 Args:
391 object_store: Object store to save the tree in
392 index: Index file
393 Note: This function is deprecated, use index.commit() instead.
394 Returns: Root tree sha.
386395 """
387396 return commit_tree(object_store, index.iterobjects())
388397
392401 """Find the differences between the contents of a tree and
393402 a working copy.
394403
395 :param names: Iterable of names in the working copy
396 :param lookup_entry: Function to lookup an entry in the working copy
397 :param object_store: Object store to use for retrieving tree contents
398 :param tree: SHA1 of the root tree, or None for an empty tree
399 :param want_unchanged: Whether unchanged files should be reported
400 :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
404 Args:
405 names: Iterable of names in the working copy
406 lookup_entry: Function to lookup an entry in the working copy
407 object_store: Object store to use for retrieving tree contents
408 tree: SHA1 of the root tree, or None for an empty tree
409 want_unchanged: Whether unchanged files should be reported
410 Returns: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
401411 (oldsha, newsha)
402412 """
403413 # TODO(jelmer): Support a include_trees option
428438 def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
429439 """Create a new index entry from a stat value.
430440
431 :param stat_val: POSIX stat_result instance
432 :param hex_sha: Hex sha of the object
433 :param flags: Index flags
441 Args:
442 stat_val: POSIX stat_result instance
443 hex_sha: Hex sha of the object
444 flags: Index flags
434445 """
435446 if mode is None:
436447 mode = cleanup_mode(stat_val.st_mode)
444455 def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
445456 """Build a file or symlink on disk based on a Git object.
446457
447 :param obj: The git object
448 :param mode: File mode
449 :param target_path: Path to write to
450 :param honor_filemode: An optional flag to honor core.filemode setting in
458 Args:
459 obj: The git object
460 mode: File mode
461 target_path: Path to write to
462 honor_filemode: An optional flag to honor core.filemode setting in
451463 config file, default is core.filemode=True, change executable bit
452 :return: stat object for the file
464 Returns: stat object for the file
453465 """
454466 try:
455467 oldstat = os.lstat(target_path)
517529 validate_path_element=validate_path_element_default):
518530 """Generate and materialize index from a tree
519531
520 :param tree_id: Tree to materialize
521 :param root_path: Target dir for materialized index files
522 :param index_path: Target path for generated index
523 :param object_store: Non-empty object store holding tree contents
524 :param honor_filemode: An optional flag to honor core.filemode setting in
532 Args:
533 tree_id: Tree to materialize
534 root_path: Target dir for materialized index files
535 index_path: Target path for generated index
536 object_store: Non-empty object store holding tree contents
537 honor_filemode: An optional flag to honor core.filemode setting in
525538 config file, default is core.filemode=True, change executable bit
526 :param validate_path_element: Function to validate path elements to check
539 validate_path_element: Function to validate path elements to check
527540 out; default just refuses .git and .. directories.
528541
529 :note:: existing index is wiped and contents are not merged
542 Note: existing index is wiped and contents are not merged
530543 in a working dir. Suitable only for fresh clones.
531544 """
532545
569582 def blob_from_path_and_stat(fs_path, st):
570583 """Create a blob from a path and a stat object.
571584
572 :param fs_path: Full file system path to file
573 :param st: A stat object
574 :return: A `Blob` object
585 Args:
586 fs_path: Full file system path to file
587 st: A stat object
588 Returns: A `Blob` object
575589 """
576590 assert isinstance(fs_path, bytes)
577591 blob = Blob()
593607 def read_submodule_head(path):
594608 """Read the head commit of a submodule.
595609
596 :param path: path to the submodule
597 :return: HEAD sha, None if not a valid head/repository
610 Args:
611 path: path to the submodule
612 Returns: HEAD sha, None if not a valid head/repository
598613 """
599614 from dulwich.errors import NotGitRepository
600615 from dulwich.repo import Repo
640655 def get_unstaged_changes(index, root_path, filter_blob_callback=None):
641656 """Walk through an index and check for differences against working tree.
642657
643 :param index: index to check
644 :param root_path: path in which to find files
645 :return: iterator over paths with unstaged changes
658 Args:
659 index: index to check
660 root_path: path in which to find files
661 Returns: iterator over paths with unstaged changes
646662 """
647663 # For each entry in the index check the sha1 & ensure not staged
648664 if not isinstance(root_path, bytes):
679695 def _tree_to_fs_path(root_path, tree_path):
680696 """Convert a git tree path to a file system path.
681697
682 :param root_path: Root filesystem path
683 :param tree_path: Git tree path as bytes
684
685 :return: File system path.
698 Args:
699 root_path: Root filesystem path
700 tree_path: Git tree path as bytes
701
702 Returns: File system path.
686703 """
687704 assert isinstance(tree_path, bytes)
688705 if os_sep_bytes != b'/':
695712 def _fs_to_tree_path(fs_path, fs_encoding=None):
696713 """Convert a file system path to a git tree path.
697714
698 :param fs_path: File system path.
699 :param fs_encoding: File system encoding
700
701 :return: Git tree path as bytes
715 Args:
716 fs_path: File system path.
717 fs_encoding: File system encoding
718
719 Returns: Git tree path as bytes
702720 """
703721 if fs_encoding is None:
704722 fs_encoding = sys.getfilesystemencoding()
720738 and tree references. for directories and
721739 non-existant files it returns None
722740
723 :param path: Path to create an index entry for
724 :param object_store: Optional object store to
741 Args:
742 path: Path to create an index entry for
743 object_store: Optional object store to
725744 save new blobs in
726 :return: An index entry; None for directories
745 Returns: An index entry; None for directories
727746 """
728747 assert isinstance(path, bytes)
729748 st = os.lstat(path)
745764 def iter_fresh_entries(paths, root_path, object_store=None):
746765 """Iterate over current versions of index entries on disk.
747766
748 :param paths: Paths to iterate over
749 :param root_path: Root path to access from
750 :param store: Optional store to save new blobs in
751 :return: Iterator over path, index_entry
767 Args:
768 paths: Paths to iterate over
769 root_path: Root path to access from
770 store: Optional store to save new blobs in
771 Returns: Iterator over path, index_entry
752772 """
753773 for path in paths:
754774 p = _tree_to_fs_path(root_path, path)
767787
768788 Don't use this function; it removes missing entries from index.
769789
770 :param index: Index file
771 :param root_path: Root path to access from
772 :param include_deleted: Include deleted entries with sha and
790 Args:
791 index: Index file
792 root_path: Root path to access from
793 include_deleted: Include deleted entries with sha and
773794 mode set to None
774 :return: Iterator over path, sha, mode
795 Returns: Iterator over path, sha, mode
775796 """
776797 import warnings
777798 warnings.warn(PendingDeprecationWarning,
788809 object_store=None):
789810 """Iterate over versions of objecs on disk referenced by index.
790811
791 :param index: Index file
792 :param root_path: Root path to access from
793 :param include_deleted: Include deleted entries with sha and
812 Args:
813 index: Index file
814 root_path: Root path to access from
815 include_deleted: Include deleted entries with sha and
794816 mode set to None
795 :param object_store: Optional object store to report new items to
796 :return: Iterator over path, sha, mode
817 object_store: Optional object store to report new items to
818 Returns: Iterator over path, sha, mode
797819 """
798820 for path, entry in iter_fresh_entries(paths, root_path,
799821 object_store=object_store):
810832
811833 This is the equivalent to running 'git commit -a'.
812834
813 :param index: Index to update
814 :param root_path: Root filesystem path
835 Args:
836 index: Index to update
837 root_path: Root filesystem path
815838 """
816839 for path, entry in iter_fresh_entries(index, root_path):
817840 index[path] = path
135135 def convert_crlf_to_lf(text_hunk):
136136 """Convert CRLF in text hunk into LF
137137
138 :param text_hunk: A bytes string representing a text hunk
139 :return: The text hunk with the same type, with CRLF replaced into LF
138 Args:
139 text_hunk: A bytes string representing a text hunk
140 Returns: The text hunk with the same type, with CRLF replaced into LF
140141 """
141142 return text_hunk.replace(CRLF, LF)
142143
144145 def convert_lf_to_crlf(text_hunk):
145146 """Convert LF in text hunk into CRLF
146147
147 :param text_hunk: A bytes string representing a text hunk
148 :return: The text hunk with the same type, with LF replaced into CRLF
148 Args:
149 text_hunk: A bytes string representing a text hunk
150 Returns: The text hunk with the same type, with LF replaced into CRLF
149151 """
150152 # TODO find a more efficient way of doing it
151153 intermediary = text_hunk.replace(CRLF, LF)
173175 def get_checkout_filter_autocrlf(core_autocrlf):
174176 """ Returns the correct checkout filter base on autocrlf value
175177
176 :param core_autocrlf: The bytes configuration value of core.autocrlf.
178 Args:
179 core_autocrlf: The bytes configuration value of core.autocrlf.
177180 Valid values are: b'true', b'false' or b'input'.
178 :return: Either None if no filter has to be applied or a function
181 Returns: Either None if no filter has to be applied or a function
179182 accepting a single argument, a binary text hunk
180183 """
181184
188191 def get_checkin_filter_autocrlf(core_autocrlf):
189192 """ Returns the correct checkin filter base on autocrlf value
190193
191 :param core_autocrlf: The bytes configuration value of core.autocrlf.
194 Args:
195 core_autocrlf: The bytes configuration value of core.autocrlf.
192196 Valid values are: b'true', b'false' or b'input'.
193 :return: Either None if no filter has to be applied or a function
197 Returns: Either None if no filter has to be applied or a function
194198 accepting a single argument, a binary text hunk
195199 """
196200
139139 Also, if the entry is ever removed from the cache, call
140140 cleanup(key, value).
141141
142 :param key: The key to store it under
143 :param value: The object to store
144 :param cleanup: None or a function taking (key, value) to indicate
142 Args:
143 key: The key to store it under
144 value: The object to store
145 cleanup: None or a function taking (key, value) to indicate
145146 'value' should be cleaned up.
146147 """
147148 if key is _null_key:
178179 request them later. This is simply meant as a peak into the current
179180 state.
180181
181 :return: An unordered list of keys that are currently cached.
182 Returns: An unordered list of keys that are currently cached.
182183 """
183184 return self._cache.keys()
184185
287288 compute_size=None):
288289 """Create a new LRUSizeCache.
289290
290 :param max_size: The max number of bytes to store before we start
291 Args:
292 max_size: The max number of bytes to store before we start
291293 clearing out entries.
292 :param after_cleanup_size: After cleaning up, shrink everything to this
294 after_cleanup_size: After cleaning up, shrink everything to this
293295 size.
294 :param compute_size: A function to compute the size of the values. We
296 compute_size: A function to compute the size of the values. We
295297 use a function here, so that you can pass 'len' if you are just
296298 using simple strings, or a more complex function if you are using
297299 something like a list of strings, or even a custom object.
311313 Also, if the entry is ever removed from the cache, call
312314 cleanup(key, value).
313315
314 :param key: The key to store it under
315 :param value: The object to store
316 :param cleanup: None or a function taking (key, value) to indicate
316 Args:
317 key: The key to store it under
318 value: The object to store
319 cleanup: None or a function taking (key, value) to indicate
317320 'value' should be cleaned up.
318321 """
319322 if key is _null_key:
3636 def read_mailmap(f):
3737 """Read a mailmap.
3838
39 :param f: File-like object to read from
40 :return: Iterator over
39 Args:
40 f: File-like object to read from
41 Returns: Iterator over
4142 ((canonical_name, canonical_email), (from_name, from_email)) tuples
4243 """
4344 for line in f:
7172 Any of the fields can be None, but at least one of them needs to be
7273 set.
7374
74 :param canonical_identity: The canonical identity (tuple)
75 :param from_identity: The from identity (tuple)
75 Args:
76 canonical_identity: The canonical identity (tuple)
77 from_identity: The from identity (tuple)
7678 """
7779 if from_identity is None:
7880 from_name, from_email = None, None
8181 def iter_shas(self, shas):
8282 """Iterate over the objects for the specified shas.
8383
84 :param shas: Iterable object with SHAs
85 :return: Object iterator
84 Args:
85 shas: Iterable object with SHAs
86 Returns: Object iterator
8687 """
8788 return ObjectStoreIterator(self, shas)
8889
109110 def get_raw(self, name):
110111 """Obtain the raw text for an object.
111112
112 :param name: sha for the object.
113 :return: tuple with numeric type and object contents.
113 Args:
114 name: sha for the object.
115 Returns: tuple with numeric type and object contents.
114116 """
115117 raise NotImplementedError(self.get_raw)
116118
132134 def add_objects(self, objects, progress=None):
133135 """Add a set of objects to this object store.
134136
135 :param objects: Iterable over a list of (object, path) tuples
137 Args:
138 objects: Iterable over a list of (object, path) tuples
136139 """
137140 raise NotImplementedError(self.add_objects)
138141
139142 def add_pack_data(self, count, pack_data, progress=None):
140143 """Add pack data to this object store.
141144
142 :param num_items: Number of items to add
143 :param pack_data: Iterator over pack data tuples
145 Args:
146 num_items: Number of items to add
147 pack_data: Iterator over pack data tuples
144148 """
145149 if count == 0:
146150 # Don't bother writing an empty pack file
158162 include_trees=False, change_type_same=False):
159163 """Find the differences between the contents of two trees
160164
161 :param source: SHA1 of the source tree
162 :param target: SHA1 of the target tree
163 :param want_unchanged: Whether unchanged files should be reported
164 :param include_trees: Whether to include trees
165 :param change_type_same: Whether to report files changing
165 Args:
166 source: SHA1 of the source tree
167 target: SHA1 of the target tree
168 want_unchanged: Whether unchanged files should be reported
169 include_trees: Whether to include trees
170 change_type_same: Whether to report files changing
166171 type in the same entry.
167 :return: Iterator over tuples with
172 Returns: Iterator over tuples with
168173 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
169174 """
170175 for change in tree_changes(self, source, target,
180185
181186 Iteration is depth-first pre-order, as in e.g. os.walk.
182187
183 :param tree_id: SHA1 of the tree.
184 :param include_trees: If True, include tree objects in the iteration.
185 :return: Iterator over TreeEntry namedtuples for all the objects in a
188 Args:
189 tree_id: SHA1 of the tree.
190 include_trees: If True, include tree objects in the iteration.
191 Returns: Iterator over TreeEntry namedtuples for all the objects in a
186192 tree.
187193 """
188194 for entry, _ in walk_trees(self, tree_id, None):
196202 depth=None):
197203 """Find the missing objects required for a set of revisions.
198204
199 :param haves: Iterable over SHAs already in common.
200 :param wants: Iterable over SHAs of objects to fetch.
201 :param progress: Simple progress function that will be called with
205 Args:
206 haves: Iterable over SHAs already in common.
207 wants: Iterable over SHAs of objects to fetch.
208 progress: Simple progress function that will be called with
202209 updated progress strings.
203 :param get_tagged: Function that returns a dict of pointed-to sha ->
210 get_tagged: Function that returns a dict of pointed-to sha ->
204211 tag sha for including tags.
205 :param get_parents: Optional function for getting the parents of a
212 get_parents: Optional function for getting the parents of a
206213 commit.
207 :return: Iterator over (sha, path) pairs.
214 Returns: Iterator over (sha, path) pairs.
208215 """
209216 finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
210217 get_parents=get_parents)
213220 def find_common_revisions(self, graphwalker):
214221 """Find which revisions this store has in common using graphwalker.
215222
216 :param graphwalker: A graphwalker object.
217 :return: List of SHAs that are in common
223 Args:
224 graphwalker: A graphwalker object.
225 Returns: List of SHAs that are in common
218226 """
219227 haves = []
220228 sha = next(graphwalker)
228236 def generate_pack_contents(self, have, want, progress=None):
229237 """Iterate over the contents of a pack file.
230238
231 :param have: List of SHA1s of objects that should not be sent
232 :param want: List of SHA1s of objects that should be sent
233 :param progress: Optional progress reporting method
239 Args:
240 have: List of SHA1s of objects that should not be sent
241 want: List of SHA1s of objects that should be sent
242 progress: Optional progress reporting method
234243 """
235244 return self.iter_shas(self.find_missing_objects(have, want, progress))
236245
237246 def generate_pack_data(self, have, want, progress=None, ofs_delta=True):
238247 """Generate pack data objects for a set of wants/haves.
239248
240 :param have: List of SHA1s of objects that should not be sent
241 :param want: List of SHA1s of objects that should be sent
242 :param ofs_delta: Whether OFS deltas can be included
243 :param progress: Optional progress reporting method
249 Args:
250 have: List of SHA1s of objects that should not be sent
251 want: List of SHA1s of objects that should be sent
252 ofs_delta: Whether OFS deltas can be included
253 progress: Optional progress reporting method
244254 """
245255 # TODO(jelmer): More efficient implementation
246256 return pack_objects_to_data(
249259 def peel_sha(self, sha):
250260 """Peel all tags from a SHA.
251261
252 :param sha: The object SHA to peel.
253 :return: The fully-peeled SHA1 of a tag object, after peeling all
262 Args:
263 sha: The object SHA to peel.
264 Returns: The fully-peeled SHA1 of a tag object, after peeling all
254265 intermediate tags; if the original ref does not point to a tag,
255266 this will equal the original SHA1.
256267 """
265276 get_parents=lambda commit: commit.parents):
266277 """Collect all ancestors of heads up to (excluding) those in common.
267278
268 :param heads: commits to start from
269 :param common: commits to end at, or empty set to walk repository
279 Args:
280 heads: commits to start from
281 common: commits to end at, or empty set to walk repository
270282 completely
271 :param get_parents: Optional function for getting the parents of a
283 get_parents: Optional function for getting the parents of a
272284 commit.
273 :return: a tuple (A, B) where A - all commits reachable
285 Returns: a tuple (A, B) where A - all commits reachable
274286 from heads but not present in common, B - common (shared) elements
275287 that are directly reachable from heads
276288 """
381393 def pack_loose_objects(self):
382394 """Pack loose objects.
383395
384 :return: Number of objects packed
396 Returns: Number of objects packed
385397 """
386398 objects = set()
387399 for sha in self._iter_loose_objects():
443455 def get_raw(self, name):
444456 """Obtain the raw fulltext for an object.
445457
446 :param name: sha for the object.
447 :return: tuple with numeric type and object contents.
458 Args:
459 name: sha for the object.
460 Returns: tuple with numeric type and object contents.
448461 """
449462 if name == ZERO_SHA:
450463 raise KeyError(name)
483496 def add_objects(self, objects, progress=None):
484497 """Add a set of objects to this object store.
485498
486 :param objects: Iterable over (object, path) tuples, should support
499 Args:
500 objects: Iterable over (object, path) tuples, should support
487501 __len__.
488 :return: Pack object of the objects written.
502 Returns: Pack object of the objects written.
489503 """
490504 return self.add_pack_data(
491505 *pack_objects_to_data(objects),
498512 def __init__(self, path):
499513 """Open an object store.
500514
501 :param path: Path of the object store.
515 Args:
516 path: Path of the object store.
502517 """
503518 super(DiskObjectStore, self).__init__()
504519 self.path = path
631646 def _complete_thin_pack(self, f, path, copier, indexer):
632647 """Move a specific file containing a pack into the pack directory.
633648
634 :note: The file should be on the same file system as the
649 Note: The file should be on the same file system as the
635650 packs directory.
636651
637 :param f: Open file object for the pack.
638 :param path: Path to the pack file.
639 :param copier: A PackStreamCopier to use for writing pack data.
640 :param indexer: A PackIndexer for indexing the pack.
652 Args:
653 f: Open file object for the pack.
654 path: Path to the pack file.
655 copier: A PackStreamCopier to use for writing pack data.
656 indexer: A PackIndexer for indexing the pack.
641657 """
642658 entries = list(indexer)
643659
700716 outside the pack. They should never be placed in the object store
701717 directly, and always indexed and completed as they are copied.
702718
703 :param read_all: Read function that blocks until the number of
719 Args:
720 read_all: Read function that blocks until the number of
704721 requested bytes are read.
705 :param read_some: Read function that returns at least one byte, but may
722 read_some: Read function that returns at least one byte, but may
706723 not return the number of bytes requested.
707 :return: A Pack object pointing at the now-completed thin pack in the
724 Returns: A Pack object pointing at the now-completed thin pack in the
708725 objects/pack directory.
709726 """
710727 fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
718735 def move_in_pack(self, path):
719736 """Move a specific file containing a pack into the pack directory.
720737
721 :note: The file should be on the same file system as the
738 Note: The file should be on the same file system as the
722739 packs directory.
723740
724 :param path: Path to the pack file.
741 Args:
742 path: Path to the pack file.
725743 """
726744 with PackData(path) as p:
727745 entries = p.sorted_entries()
750768 def add_pack(self):
751769 """Add a new pack to this object store.
752770
753 :return: Fileobject to write to, a commit function to
771 Returns: Fileobject to write to, a commit function to
754772 call when the pack is finished and an abort
755773 function.
756774 """
775793 def add_object(self, obj):
776794 """Add a single object to this object store.
777795
778 :param obj: Object to add
796 Args:
797 obj: Object to add
779798 """
780799 path = self._get_shafile_path(obj.id)
781800 dir = os.path.dirname(path)
836855 def get_raw(self, name):
837856 """Obtain the raw text for an object.
838857
839 :param name: sha for the object.
840 :return: tuple with numeric type and object contents.
858 Args:
859 name: sha for the object.
860 Returns: tuple with numeric type and object contents.
841861 """
842862 obj = self[self._to_hexsha(name)]
843863 return obj.type_num, obj.as_raw_string()
858878 def add_objects(self, objects, progress=None):
859879 """Add a set of objects to this object store.
860880
861 :param objects: Iterable over a list of (object, path) tuples
881 Args:
882 objects: Iterable over a list of (object, path) tuples
862883 """
863884 for obj, path in objects:
864885 self.add_object(obj)
869890 Because this object store doesn't support packs, we extract and add the
870891 individual objects.
871892
872 :return: Fileobject to write to and a commit function to
893 Returns: Fileobject to write to and a commit function to
873894 call when the pack is finished.
874895 """
875896 f = BytesIO()
887908 def _complete_thin_pack(self, f, indexer):
888909 """Complete a thin pack by adding external references.
889910
890 :param f: Open file object for the pack.
891 :param indexer: A PackIndexer for indexing the pack.
911 Args:
912 f: Open file object for the pack.
913 indexer: A PackIndexer for indexing the pack.
892914 """
893915 entries = list(indexer)
894916
914936 outside the pack. Because this object store doesn't support packs, we
915937 extract and add the individual objects.
916938
917 :param read_all: Read function that blocks until the number of
939 Args:
940 read_all: Read function that blocks until the number of
918941 requested bytes are read.
919 :param read_some: Read function that returns at least one byte, but may
942 read_some: Read function that returns at least one byte, but may
920943 not return the number of bytes requested.
921944 """
922945 f, commit, abort = self.add_pack()
946969 def __init__(self, store, sha_iter):
947970 """Create a new ObjectIterator.
948971
949 :param store: Object store to retrieve from
950 :param sha_iter: Iterator over (sha, path) tuples
972 Args:
973 store: Object store to retrieve from
974 sha_iter: Iterator over (sha, path) tuples
951975 """
952976 self.store = store
953977 self.sha_iter = sha_iter
974998 def __contains__(self, needle):
975999 """Check if an object is present.
9761000
977 :note: This checks if the object is present in
1001 Note: This checks if the object is present in
9781002 the underlying object store, not if it would
9791003 be yielded by the iterator.
9801004
981 :param needle: SHA1 of the object to check for
1005 Args:
1006 needle: SHA1 of the object to check for
9821007 """
9831008 if needle == ZERO_SHA:
9841009 return False
9871012 def __getitem__(self, key):
9881013 """Find an object by SHA1.
9891014
990 :note: This retrieves the object from the underlying
1015 Note: This retrieves the object from the underlying
9911016 object store. It will also succeed if the object would
9921017 not be returned by the iterator.
9931018 """
10191044 def tree_lookup_path(lookup_obj, root_sha, path):
10201045 """Look up an object in a Git tree.
10211046
1022 :param lookup_obj: Callback for retrieving object by SHA1
1023 :param root_sha: SHA1 of the root tree
1024 :param path: Path to lookup
1025 :return: A tuple of (mode, SHA) of the resulting path.
1047 Args:
1048 lookup_obj: Callback for retrieving object by SHA1
1049 root_sha: SHA1 of the root tree
1050 path: Path to lookup
1051 Returns: A tuple of (mode, SHA) of the resulting path.
10261052 """
10271053 tree = lookup_obj(root_sha)
10281054 if not isinstance(tree, Tree):
10331059 def _collect_filetree_revs(obj_store, tree_sha, kset):
10341060 """Collect SHA1s of files and directories for specified tree.
10351061
1036 :param obj_store: Object store to get objects by SHA from
1037 :param tree_sha: tree reference to walk
1038 :param kset: set to fill with references to files and directories
1062 Args:
1063 obj_store: Object store to get objects by SHA from
1064 tree_sha: tree reference to walk
1065 kset: set to fill with references to files and directories
10391066 """
10401067 filetree = obj_store[tree_sha]
10411068 for name, mode, sha in filetree.iteritems():
10531080 through, and unless ignore_unknown argument is True, KeyError
10541081 is thrown for SHA1 missing in the repository
10551082
1056 :param obj_store: Object store to get objects by SHA1 from
1057 :param lst: Collection of commit and tag SHAs
1058 :param ignore_unknown: True to skip SHA1 missing in the repository
1083 Args:
1084 obj_store: Object store to get objects by SHA1 from
1085 lst: Collection of commit and tag SHAs
1086 ignore_unknown: True to skip SHA1 missing in the repository
10591087 silently.
1060 :return: A tuple of (commits, tags, others) SHA1s
1088 Returns: A tuple of (commits, tags, others) SHA1s
10611089 """
10621090 commits = set()
10631091 tags = set()
10871115 class MissingObjectFinder(object):
10881116 """Find the objects missing from another object store.
10891117
1090 :param object_store: Object store containing at least all objects to be
1118 Args:
1119 object_store: Object store containing at least all objects to be
10911120 sent
1092 :param haves: SHA1s of commits not to send (already present in target)
1093 :param wants: SHA1s of commits to send
1094 :param progress: Optional function to report progress to.
1095 :param get_tagged: Function that returns a dict of pointed-to sha -> tag
1121 haves: SHA1s of commits not to send (already present in target)
1122 wants: SHA1s of commits to send
1123 progress: Optional function to report progress to.
1124 get_tagged: Function that returns a dict of pointed-to sha -> tag
10961125 sha for including tags.
1097 :param get_parents: Optional function for getting the parents of a commit.
1098 :param tagged: dict of pointed-to sha -> tag sha for including tags
1126 get_parents: Optional function for getting the parents of a commit.
1127 tagged: dict of pointed-to sha -> tag sha for including tags
10991128 """
11001129
11011130 def __init__(self, object_store, haves, wants, progress=None,
11891218 def __init__(self, local_heads, get_parents, shallow=None):
11901219 """Create a new instance.
11911220
1192 :param local_heads: Heads to start search with
1193 :param get_parents: Function for finding the parents of a SHA1.
1221 Args:
1222 local_heads: Heads to start search with
1223 get_parents: Function for finding the parents of a SHA1.
11941224 """
11951225 self.heads = set(local_heads)
11961226 self.get_parents = get_parents
12531283 number of changes to a big tree. For a large number of changes
12541284 to a large tree, use e.g. commit_tree.
12551285
1256 :param object_store: Object store to store new objects in
1286 Args:
1287 object_store: Object store to store new objects in
12571288 and retrieve old ones from.
1258 :param tree: Original tree root
1259 :param changes: changes to apply
1260 :return: New tree root object
1289 tree: Original tree root
1290 changes: changes to apply
1291 Returns: New tree root object
12611292 """
12621293 # TODO(jelmer): Save up the objects and add them using .add_objects
12631294 # rather than with individual calls to .add_object.
7272 def S_ISGITLINK(m):
7373 """Check if a mode indicates a submodule.
7474
75 :param m: Mode to check
76 :return: a ``boolean``
75 Args:
76 m: Mode to check
77 Returns: a ``boolean``
7778 """
7879 return (stat.S_IFMT(m) == S_IFGITLINK)
7980
161162 def object_class(type):
162163 """Get the object class corresponding to the given type.
163164
164 :param type: Either a type name string or a numeric type.
165 :return: The ShaFile subclass corresponding to the given type, or None if
165 Args:
166 type: Either a type name string or a numeric type.
167 Returns: The ShaFile subclass corresponding to the given type, or None if
166168 type is not a valid type name/number.
167169 """
168170 return _TYPE_MAP.get(type, None)
171173 def check_hexsha(hex, error_msg):
172174 """Check if a string is a valid hex sha string.
173175
174 :param hex: Hex string to check
175 :param error_msg: Error message to use in exception
176 :raise ObjectFormatException: Raised when the string is not valid
176 Args:
177 hex: Hex string to check
178 error_msg: Error message to use in exception
179 Raises:
180 ObjectFormatException: Raised when the string is not valid
177181 """
178182 if not valid_hexsha(hex):
179183 raise ObjectFormatException("%s %s" % (error_msg, hex))
184188
185189 This will raise an exception if the identity is not valid.
186190
187 :param identity: Identity string
188 :param error_msg: Error message to use in exception
191 Args:
192 identity: Identity string
193 error_msg: Error message to use in exception
189194 """
190195 email_start = identity.find(b'<')
191196 email_end = identity.find(b'>')
201206
202207 This will raise an exception if the time is not valid.
203208
204 :param time_info: author/committer/tagger info
209 Args:
210 time_info: author/committer/tagger info
205211
206212 """
207213 # Prevent overflow error
278284 def as_legacy_object_chunks(self):
279285 """Return chunks representing the object in the experimental format.
280286
281 :return: List of strings
287 Returns: List of strings
282288 """
283289 compobj = zlib.compressobj()
284290 yield compobj.compress(self._header())
294300 def as_raw_chunks(self):
295301 """Return chunks with serialization of the object.
296302
297 :return: List of strings, not necessarily one per line
303 Returns: List of strings, not necessarily one per line
298304 """
299305 if self._needs_serialization:
300306 self._sha = None
305311 def as_raw_string(self):
306312 """Return raw string with serialization of the object.
307313
308 :return: String object
314 Returns: String object
309315 """
310316 return b''.join(self.as_raw_chunks())
311317
416422 def from_raw_string(type_num, string, sha=None):
417423 """Creates an object of the indicated type from the raw string given.
418424
419 :param type_num: The numeric type of the object.
420 :param string: The raw uncompressed contents.
421 :param sha: Optional known sha for the object
425 Args:
426 type_num: The numeric type of the object.
427 string: The raw uncompressed contents.
428 sha: Optional known sha for the object
422429 """
423430 obj = object_class(type_num)()
424431 obj.set_raw_string(string, sha)
428435 def from_raw_chunks(type_num, chunks, sha=None):
429436 """Creates an object of the indicated type from the raw chunks given.
430437
431 :param type_num: The numeric type of the object.
432 :param chunks: An iterable of the raw uncompressed contents.
433 :param sha: Optional known sha for the object
438 Args:
439 type_num: The numeric type of the object.
440 chunks: An iterable of the raw uncompressed contents.
441 sha: Optional known sha for the object
434442 """
435443 obj = object_class(type_num)()
436444 obj.set_raw_chunks(chunks, sha)
446454 def _check_has_member(self, member, error_msg):
447455 """Check that the object has a given member variable.
448456
449 :param member: the member variable to check for
450 :param error_msg: the message for an error if the member is missing
451 :raise ObjectFormatException: with the given error_msg if member is
457 Args:
458 member: the member variable to check for
459 error_msg: the message for an error if the member is missing
460 Raises:
461 ObjectFormatException: with the given error_msg if member is
452462 missing or is None
453463 """
454464 if getattr(self, member, None) is None:
457467 def check(self):
458468 """Check this object for internal consistency.
459469
460 :raise ObjectFormatException: if the object is malformed in some way
461 :raise ChecksumMismatch: if the object was created with a SHA that does
470 Raises:
471 ObjectFormatException: if the object is malformed in some way
472 ChecksumMismatch: if the object was created with a SHA that does
462473 not match its contents
463474 """
464475 # TODO: if we find that error-checking during object parsing is a
602613 def check(self):
603614 """Check this object for internal consistency.
604615
605 :raise ObjectFormatException: if the object is malformed in some way
616 Raises:
617 ObjectFormatException: if the object is malformed in some way
606618 """
607619 super(Blob, self).check()
608620
637649 def _parse_message(chunks):
638650 """Parse a message with a list of fields and a body.
639651
640 :param chunks: the raw chunks of the tag or commit object.
641 :return: iterator of tuples of (field, value), one per header line, in the
652 Args:
653 chunks: the raw chunks of the tag or commit object.
654 Returns: iterator of tuples of (field, value), one per header line, in the
642655 order read from the text, possibly including duplicates. Includes a
643656 field named None for the freeform tag/commit text.
644657 """
715728 def check(self):
716729 """Check this object for internal consistency.
717730
718 :raise ObjectFormatException: if the object is malformed in some way
731 Raises:
732 ObjectFormatException: if the object is malformed in some way
719733 """
720734 super(Tag, self).check()
721735 self._check_has_member("_object_sha", "missing object sha")
806820 def _get_object(self):
807821 """Get the object pointed to by this tag.
808822
809 :return: tuple of (object class, sha).
823 Returns: tuple of (object class, sha).
810824 """
811825 return (self._object_class, self._object_sha)
812826
847861 def parse_tree(text, strict=False):
848862 """Parse a tree text.
849863
850 :param text: Serialized text to parse
851 :return: iterator of tuples of (name, mode, sha)
852 :raise ObjectFormatException: if the object was malformed in some way
864 Args:
865 text: Serialized text to parse
866 Returns: iterator of tuples of (name, mode, sha)
867 Raises:
868 ObjectFormatException: if the object was malformed in some way
853869 """
854870 count = 0
855871 length = len(text)
875891 def serialize_tree(items):
876892 """Serialize the items in a tree to a text.
877893
878 :param items: Sorted iterable over (name, mode, sha) tuples
879 :return: Serialized tree text as chunks
894 Args:
895 items: Sorted iterable over (name, mode, sha) tuples
896 Returns: Serialized tree text as chunks
880897 """
881898 for name, mode, hexsha in items:
882899 yield (("%04o" % mode).encode('ascii') + b' ' + name +
886903 def sorted_tree_items(entries, name_order):
887904 """Iterate over a tree entries dictionary.
888905
889 :param name_order: If True, iterate entries in order of their name. If
906 Args:
907 name_order: If True, iterate entries in order of their name. If
890908 False, iterate entries in tree order, that is, treat subtree entries as
891909 having '/' appended.
892 :param entries: Dictionary mapping names to (mode, sha) tuples
893 :return: Iterator over (name, mode, hexsha)
910 entries: Dictionary mapping names to (mode, sha) tuples
911 Returns: Iterator over (name, mode, hexsha)
894912 """
895913 key_func = name_order and key_entry_name_order or key_entry
896914 for name, entry in sorted(entries.items(), key=key_func):
905923 def key_entry(entry):
906924 """Sort key for tree entry.
907925
908 :param entry: (name, value) tuplee
926 Args:
927 entry: (name, value) tuplee
909928 """
910929 (name, value) = entry
911930 if stat.S_ISDIR(value[0]):
921940 def pretty_format_tree_entry(name, mode, hexsha, encoding="utf-8"):
922941 """Pretty format tree entry.
923942
924 :param name: Name of the directory entry
925 :param mode: Mode of entry
926 :param hexsha: Hexsha of the referenced object
927 :return: string describing the tree entry
943 Args:
944 name: Name of the directory entry
945 mode: Mode of entry
946 hexsha: Hexsha of the referenced object
947 Returns: string describing the tree entry
928948 """
929949 if mode & stat.S_IFDIR:
930950 kind = "tree"
963983 def __setitem__(self, name, value):
964984 """Set a tree entry by name.
965985
966 :param name: The name of the entry, as a string.
967 :param value: A tuple of (mode, hexsha), where mode is the mode of the
986 Args:
987 name: The name of the entry, as a string.
988 value: A tuple of (mode, hexsha), where mode is the mode of the
968989 entry as an integral type and hexsha is the hex SHA of the entry as
969990 a string.
970991 """
9851006 def add(self, name, mode, hexsha):
9861007 """Add an entry to the tree.
9871008
988 :param mode: The mode of the entry as an integral type. Not all
1009 Args:
1010 mode: The mode of the entry as an integral type. Not all
9891011 possible modes are supported by git; see check() for details.
990 :param name: The name of the entry, as a string.
991 :param hexsha: The hex SHA of the entry as a string.
1012 name: The name of the entry, as a string.
1013 hexsha: The hex SHA of the entry as a string.
9921014 """
9931015 if isinstance(name, int) and isinstance(mode, bytes):
9941016 (name, mode) = (mode, name)
10011023 def iteritems(self, name_order=False):
10021024 """Iterate over entries.
10031025
1004 :param name_order: If True, iterate in name order instead of tree
1026 Args:
1027 name_order: If True, iterate in name order instead of tree
10051028 order.
1006 :return: Iterator over (name, mode, sha) tuples
1029 Returns: Iterator over (name, mode, sha) tuples
10071030 """
10081031 return sorted_tree_items(self._entries, name_order)
10091032
10101033 def items(self):
10111034 """Return the sorted entries in this tree.
10121035
1013 :return: List with (name, mode, sha) tuples
1036 Returns: List with (name, mode, sha) tuples
10141037 """
10151038 return list(self.iteritems())
10161039
10281051 def check(self):
10291052 """Check this object for internal consistency.
10301053
1031 :raise ObjectFormatException: if the object is malformed in some way
1054 Raises:
1055 ObjectFormatException: if the object is malformed in some way
10321056 """
10331057 super(Tree, self).check()
10341058 last = None
10671091 def lookup_path(self, lookup_obj, path):
10681092 """Look up an object in a Git tree.
10691093
1070 :param lookup_obj: Callback for retrieving object by SHA1
1071 :param path: Path to lookup
1072 :return: A tuple of (mode, SHA) of the resulting path.
1094 Args:
1095 lookup_obj: Callback for retrieving object by SHA1
1096 path: Path to lookup
1097 Returns: A tuple of (mode, SHA) of the resulting path.
10731098 """
10741099 parts = path.split(b'/')
10751100 sha = self.id
10871112 def parse_timezone(text):
10881113 """Parse a timezone text fragment (e.g. '+0100').
10891114
1090 :param text: Text to parse.
1091 :return: Tuple with timezone as seconds difference to UTC
1115 Args:
1116 text: Text to parse.
1117 Returns: Tuple with timezone as seconds difference to UTC
10921118 and a boolean indicating whether this was a UTC timezone
10931119 prefixed with a negative sign (-0000).
10941120 """
11131139 def format_timezone(offset, unnecessary_negative_timezone=False):
11141140 """Format a timezone for Git serialization.
11151141
1116 :param offset: Timezone offset as seconds difference to UTC
1117 :param unnecessary_negative_timezone: Whether to use a minus sign for
1142 Args:
1143 offset: Timezone offset as seconds difference to UTC
1144 unnecessary_negative_timezone: Whether to use a minus sign for
11181145 UTC or positive timezones (-0000 and --700 rather than +0000 / +0700).
11191146 """
11201147 if offset % 60 != 0:
11311158 def parse_time_entry(value):
11321159 """Parse time entry behavior
11331160
1134 :param value: Bytes representing a git commit/tag line
1135 :raise: ObjectFormatException in case of parsing error (malformed
1136 field date)
1137 :return: Tuple of (author, time, (timezone, timezone_neg_utc))
1161 Args:
1162 value: Bytes representing a git commit/tag line
1163 Raises:
1164 ObjectFormatException in case of parsing error (malformed
1165 field date)
1166 Returns: Tuple of (author, time, (timezone, timezone_neg_utc))
11381167 """
11391168 try:
11401169 sep = value.rindex(b'> ')
11541183 def parse_commit(chunks):
11551184 """Parse a commit object from chunks.
11561185
1157 :param chunks: Chunks to parse
1158 :return: Tuple of (tree, parents, author_info, commit_info,
1186 Args:
1187 chunks: Chunks to parse
1188 Returns: Tuple of (tree, parents, author_info, commit_info,
11591189 encoding, mergetag, gpgsig, message, extra)
11601190 """
11611191 parents = []
12331263 def check(self):
12341264 """Check this object for internal consistency.
12351265
1236 :raise ObjectFormatException: if the object is malformed in some way
1266 Raises:
1267 ObjectFormatException: if the object is malformed in some way
12371268 """
12381269 super(Commit, self).check()
12391270 self._check_has_member("_tree", "missing tree")
2929 def parse_object(repo, objectish):
3030 """Parse a string referring to an object.
3131
32 :param repo: A `Repo` object
33 :param objectish: A string referring to an object
34 :return: A git object
35 :raise KeyError: If the object can not be found
32 Args:
33 repo: A `Repo` object
34 objectish: A string referring to an object
35 Returns: A git object
36 Raises:
37 KeyError: If the object can not be found
3638 """
3739 objectish = to_bytes(objectish)
3840 return repo[objectish]
4143 def parse_tree(repo, treeish):
4244 """Parse a string referring to a tree.
4345
44 :param repo: A `Repo` object
45 :param treeish: A string referring to a tree
46 :return: A git object
47 :raise KeyError: If the object can not be found
46 Args:
47 repo: A `Repo` object
48 treeish: A string referring to a tree
49 Returns: A git object
50 Raises:
51 KeyError: If the object can not be found
4852 """
4953 treeish = to_bytes(treeish)
5054 o = repo[treeish]
5660 def parse_ref(container, refspec):
5761 """Parse a string referring to a reference.
5862
59 :param container: A RefsContainer object
60 :param refspec: A string referring to a ref
61 :return: A ref
62 :raise KeyError: If the ref can not be found
63 Args:
64 container: A RefsContainer object
65 refspec: A string referring to a ref
66 Returns: A ref
67 Raises:
68 KeyError: If the ref can not be found
6369 """
6470 refspec = to_bytes(refspec)
6571 possible_refs = [
7985 def parse_reftuple(lh_container, rh_container, refspec):
8086 """Parse a reftuple spec.
8187
82 :param lh_container: A RefsContainer object
83 :param hh_container: A RefsContainer object
84 :param refspec: A string
85 :return: A tuple with left and right ref
86 :raise KeyError: If one of the refs can not be found
88 Args:
89 lh_container: A RefsContainer object
90 hh_container: A RefsContainer object
91 refspec: A string
92 Returns: A tuple with left and right ref
93 Raises:
94 KeyError: If one of the refs can not be found
8795 """
8896 refspec = to_bytes(refspec)
8997 if refspec.startswith(b"+"):
114122 def parse_reftuples(lh_container, rh_container, refspecs):
115123 """Parse a list of reftuple specs to a list of reftuples.
116124
117 :param lh_container: A RefsContainer object
118 :param hh_container: A RefsContainer object
119 :param refspecs: A list of refspecs or a string
120 :return: A list of refs
121 :raise KeyError: If one of the refs can not be found
125 Args:
126 lh_container: A RefsContainer object
127 hh_container: A RefsContainer object
128 refspecs: A list of refspecs or a string
129 Returns: A list of refs
130 Raises:
131 KeyError: If one of the refs can not be found
122132 """
123133 if not isinstance(refspecs, list):
124134 refspecs = [refspecs]
132142 def parse_refs(container, refspecs):
133143 """Parse a list of refspecs to a list of refs.
134144
135 :param container: A RefsContainer object
136 :param refspecs: A list of refspecs or a string
137 :return: A list of refs
138 :raise KeyError: If one of the refs can not be found
145 Args:
146 container: A RefsContainer object
147 refspecs: A list of refspecs or a string
148 Returns: A list of refs
149 Raises:
150 KeyError: If one of the refs can not be found
139151 """
140152 # TODO: Support * in refspecs
141153 if not isinstance(refspecs, list):
149161 def parse_commit_range(repo, committishs):
150162 """Parse a string referring to a range of commits.
151163
152 :param repo: A `Repo` object
153 :param committishs: A string referring to a range of commits.
154 :return: An iterator over `Commit` objects
155 :raise KeyError: When the reference commits can not be found
156 :raise ValueError: If the range can not be parsed
164 Args:
165 repo: A `Repo` object
166 committishs: A string referring to a range of commits.
167 Returns: An iterator over `Commit` objects
168 Raises:
169 KeyError: When the reference commits can not be found
170 ValueError: If the range can not be parsed
157171 """
158172 committishs = to_bytes(committishs)
159173 # TODO(jelmer): Support more than a single commit..
186200 def parse_commit(repo, committish):
187201 """Parse a string referring to a single commit.
188202
189 :param repo: A` Repo` object
190 :param commitish: A string referring to a single commit.
191 :return: A Commit object
192 :raise KeyError: When the reference commits can not be found
193 :raise ValueError: If the range can not be parsed
203 Args:
204 repo: A` Repo` object
205 commitish: A string referring to a single commit.
206 Returns: A Commit object
207 Raises:
208 KeyError: When the reference commits can not be found
209 ValueError: If the range can not be parsed
194210 """
195211 committish = to_bytes(committish)
196212 try:
9999 def take_msb_bytes(read, crc32=None):
100100 """Read bytes marked with most significant bit.
101101
102 :param read: Read function
102 Args:
103 read: Read function
103104 """
104105 ret = []
105106 while len(ret) == 0 or ret[-1] & 0x80:
205206 This function requires that the buffer have additional data following the
206207 compressed data, which is guaranteed to be the case for git pack files.
207208
208 :param read_some: Read function that returns at least one byte, but may
209 Args:
210 read_some: Read function that returns at least one byte, but may
209211 return less than the requested size.
210 :param unpacked: An UnpackedObject to write result data to. If its crc32
212 unpacked: An UnpackedObject to write result data to. If its crc32
211213 attr is not None, the CRC32 of the compressed bytes will be computed
212214 using this starting CRC32.
213215 After this function, will have the following attrs set:
215217 * decomp_chunks
216218 * decomp_len
217219 * crc32
218 :param include_comp: If True, include compressed data in the result.
219 :param buffer_size: Size of the read buffer.
220 :return: Leftover unused data from the decompression.
221 :raise zlib.error: if a decompression error occurred.
220 include_comp: If True, include compressed data in the result.
221 buffer_size: Size of the read buffer.
222 Returns: Leftover unused data from the decompression.
223 Raises:
224 zlib.error: if a decompression error occurred.
222225 """
223226 if unpacked.decomp_len <= -1:
224227 raise ValueError('non-negative zlib data stream size expected')
262265 def iter_sha1(iter):
263266 """Return the hexdigest of the SHA1 over a set of names.
264267
265 :param iter: Iterator over string objects
266 :return: 40-byte hex sha1 digest
268 Args:
269 iter: Iterator over string objects
270 Returns: 40-byte hex sha1 digest
267271 """
268272 sha = sha1()
269273 for name in iter:
274278 def load_pack_index(path):
275279 """Load an index file by path.
276280
277 :param filename: Path to the index file
278 :return: A PackIndex loaded from the given path
281 Args:
282 filename: Path to the index file
283 Returns: A PackIndex loaded from the given path
279284 """
280285 with GitFile(path, 'rb') as f:
281286 return load_pack_index_file(path, f)
306311 def load_pack_index_file(path, f):
307312 """Load an index file from a file-like object.
308313
309 :param path: Path for the index file
310 :param f: File-like object
311 :return: A PackIndex loaded from the given file
314 Args:
315 path: Path for the index file
316 f: File-like object
317 Returns: A PackIndex loaded from the given file
312318 """
313319 contents, size = _load_file_contents(f)
314320 if contents[:4] == b'\377tOc':
325331 def bisect_find_sha(start, end, sha, unpack_name):
326332 """Find a SHA in a data blob with sorted SHAs.
327333
328 :param start: Start index of range to search
329 :param end: End index of range to search
330 :param sha: Sha to find
331 :param unpack_name: Callback to retrieve SHA by index
332 :return: Index of the SHA, or None if it wasn't found
334 Args:
335 start: Start index of range to search
336 end: End index of range to search
337 sha: Sha to find
338 unpack_name: Callback to retrieve SHA by index
339 Returns: Index of the SHA, or None if it wasn't found
333340 """
334341 assert start <= end
335342 while start <= end:
375382 def iterentries(self):
376383 """Iterate over the entries in this pack index.
377384
378 :return: iterator over tuples with object name, offset in packfile and
385 Returns: iterator over tuples with object name, offset in packfile and
379386 crc32 checksum.
380387 """
381388 raise NotImplementedError(self.iterentries)
383390 def get_pack_checksum(self):
384391 """Return the SHA1 checksum stored for the corresponding packfile.
385392
386 :return: 20-byte binary digest
393 Returns: 20-byte binary digest
387394 """
388395 raise NotImplementedError(self.get_pack_checksum)
389396
417424 def _object_index(self, sha):
418425 """See object_index.
419426
420 :param sha: A *binary* SHA string. (20 characters long)_
427 Args:
428 sha: A *binary* SHA string. (20 characters long)_
421429 """
422430 raise NotImplementedError(self._object_index)
423431
424432 def objects_sha1(self):
425433 """Return the hex SHA1 over all the shas of all objects in this pack.
426434
427 :note: This is used for the filename of the pack.
435 Note: This is used for the filename of the pack.
428436 """
429437 return iter_sha1(self._itersha())
430438
439447 def __init__(self, entries, pack_checksum=None):
440448 """Create a new MemoryPackIndex.
441449
442 :param entries: Sequence of name, idx, crc32 (sorted)
443 :param pack_checksum: Optional pack checksum
450 Args:
451 entries: Sequence of name, idx, crc32 (sorted)
452 pack_checksum: Optional pack checksum
444453 """
445454 self._by_sha = {}
446455 self._by_index = {}
523532 def _unpack_entry(self, i):
524533 """Unpack the i-th entry in the index file.
525534
526 :return: Tuple with object name (SHA), offset in pack file and CRC32
535 Returns: Tuple with object name (SHA), offset in pack file and CRC32
527536 checksum (if known).
528537 """
529538 raise NotImplementedError(self._unpack_entry)
548557 def iterentries(self):
549558 """Iterate over the entries in this pack index.
550559
551 :return: iterator over tuples with object name, offset in packfile and
560 Returns: iterator over tuples with object name, offset in packfile and
552561 crc32 checksum.
553562 """
554563 for i in range(len(self)):
572581 def calculate_checksum(self):
573582 """Calculate the SHA1 checksum over this pack index.
574583
575 :return: This is a 20-byte binary digest
584 Returns: This is a 20-byte binary digest
576585 """
577586 return sha1(self._contents[:-20]).digest()
578587
579588 def get_pack_checksum(self):
580589 """Return the SHA1 checksum stored for the corresponding packfile.
581590
582 :return: 20-byte binary digest
591 Returns: 20-byte binary digest
583592 """
584593 return bytes(self._contents[-40:-20])
585594
586595 def get_stored_checksum(self):
587596 """Return the SHA1 checksum stored for this index.
588597
589 :return: 20-byte binary digest
598 Returns: 20-byte binary digest
590599 """
591600 return bytes(self._contents[-20:])
592601
593602 def _object_index(self, sha):
594603 """See object_index.
595604
596 :param sha: A *binary* SHA string. (20 characters long)_
605 Args:
606 sha: A *binary* SHA string. (20 characters long)_
597607 """
598608 assert len(sha) == 20
599609 idx = ord(sha[:1])
678688 def read_pack_header(read):
679689 """Read the header of a pack file.
680690
681 :param read: Read function
682 :return: Tuple of (pack version, number of objects). If no data is
691 Args:
692 read: Read function
693 Returns: Tuple of (pack version, number of objects). If no data is
683694 available to read, returns (None, None).
684695 """
685696 header = read(12)
705716 include_comp=False, zlib_bufsize=_ZLIB_BUFSIZE):
706717 """Unpack a Git object.
707718
708 :param read_all: Read function that blocks until the number of requested
719 Args:
720 read_all: Read function that blocks until the number of requested
709721 bytes are read.
710 :param read_some: Read function that returns at least one byte, but may not
722 read_some: Read function that returns at least one byte, but may not
711723 return the number of bytes requested.
712 :param compute_crc32: If True, compute the CRC32 of the compressed data. If
724 compute_crc32: If True, compute the CRC32 of the compressed data. If
713725 False, the returned CRC32 will be None.
714 :param include_comp: If True, include compressed data in the result.
715 :param zlib_bufsize: An optional buffer size for zlib operations.
716 :return: A tuple of (unpacked, unused), where unused is the unused data
726 include_comp: If True, include compressed data in the result.
727 zlib_bufsize: An optional buffer size for zlib operations.
728 Returns: A tuple of (unpacked, unused), where unused is the unused data
717729 leftover from decompression, and unpacked in an UnpackedObject with
718730 the following attrs set:
719731
798810 As a side effect, update the verifier's hash (excluding the last 20
799811 bytes read).
800812
801 :param read: The read callback to read from.
802 :param size: The maximum number of bytes to read; the particular
813 Args:
814 read: The read callback to read from.
815 size: The maximum number of bytes to read; the particular
803816 behavior is callback-specific.
804817 """
805818 data = read(size)
859872 def read_objects(self, compute_crc32=False):
860873 """Read the objects in this pack file.
861874
862 :param compute_crc32: If True, compute the CRC32 of the compressed
875 Args:
876 compute_crc32: If True, compute the CRC32 of the compressed
863877 data. If False, the returned CRC32 will be None.
864 :return: Iterator over UnpackedObjects with the following members set:
878 Returns: Iterator over UnpackedObjects with the following members set:
865879 offset
866880 obj_type_num
867881 obj_chunks (for non-delta types)
869883 decomp_chunks
870884 decomp_len
871885 crc32 (if compute_crc32 is True)
872 :raise ChecksumMismatch: if the checksum of the pack contents does not
886 Raises:
887 ChecksumMismatch: if the checksum of the pack contents does not
873888 match the checksum in the pack trailer.
874 :raise zlib.error: if an error occurred during zlib decompression.
875 :raise IOError: if an error occurred writing to the output file.
889 zlib.error: if an error occurred during zlib decompression.
890 IOError: if an error occurred writing to the output file.
876891 """
877892 pack_version, self._num_objects = read_pack_header(self.read)
878893 if pack_version is None:
916931 def __init__(self, read_all, read_some, outfile, delta_iter=None):
917932 """Initialize the copier.
918933
919 :param read_all: Read function that blocks until the number of
934 Args:
935 read_all: Read function that blocks until the number of
920936 requested bytes are read.
921 :param read_some: Read function that returns at least one byte, but may
937 read_some: Read function that returns at least one byte, but may
922938 not return the number of bytes requested.
923 :param outfile: File-like object to write output through.
924 :param delta_iter: Optional DeltaChainIterator to record deltas as we
939 outfile: File-like object to write output through.
940 delta_iter: Optional DeltaChainIterator to record deltas as we
925941 read them.
926942 """
927943 super(PackStreamCopier, self).__init__(read_all, read_some=read_some)
963979 def compute_file_sha(f, start_ofs=0, end_ofs=0, buffer_size=1 << 16):
964980 """Hash a portion of a file into a new SHA.
965981
966 :param f: A file-like object to read from that supports seek().
967 :param start_ofs: The offset in the file to start reading at.
968 :param end_ofs: The offset in the file to end reading at, relative to the
982 Args:
983 f: A file-like object to read from that supports seek().
984 start_ofs: The offset in the file to start reading at.
985 end_ofs: The offset in the file to end reading at, relative to the
969986 end of the file.
970 :param buffer_size: A buffer size for reading.
971 :return: A new SHA object updated with data read from the file.
987 buffer_size: A buffer size for reading.
988 Returns: A new SHA object updated with data read from the file.
972989 """
973990 sha = sha1()
974991 f.seek(0, SEEK_END)
10771094 def calculate_checksum(self):
10781095 """Calculate the checksum for this pack.
10791096
1080 :return: 20-byte binary SHA1 digest
1097 Returns: 20-byte binary SHA1 digest
10811098 """
10821099 return compute_file_sha(self._file, end_ofs=-20).digest()
10831100
11011118 def resolve_object(self, offset, type, obj, get_ref=None):
11021119 """Resolve an object, possibly resolving deltas when necessary.
11031120
1104 :return: Tuple with object type and contents.
1121 Returns: Tuple with object type and contents.
11051122 """
11061123 # Walk down the delta chain, building a stack of deltas to reach
11071124 # the requested object.
11731190 def iterentries(self, progress=None):
11741191 """Yield entries summarizing the contents of this pack.
11751192
1176 :param progress: Progress function, called with current and total
1193 Args:
1194 progress: Progress function, called with current and total
11771195 object count.
1178 :return: iterator of tuples with (sha, offset, crc32)
1196 Returns: iterator of tuples with (sha, offset, crc32)
11791197 """
11801198 num_objects = self._num_objects
11811199 resolve_ext_ref = (
11901208 def sorted_entries(self, progress=None):
11911209 """Return entries in this pack, sorted by SHA.
11921210
1193 :param progress: Progress function, called with current and total
1211 Args:
1212 progress: Progress function, called with current and total
11941213 object count
1195 :return: List of tuples with (sha, offset, crc32)
1214 Returns: List of tuples with (sha, offset, crc32)
11961215 """
11971216 ret = sorted(self.iterentries(progress=progress))
11981217 return ret
12001219 def create_index_v1(self, filename, progress=None):
12011220 """Create a version 1 file for this data file.
12021221
1203 :param filename: Index filename.
1204 :param progress: Progress report function
1205 :return: Checksum of index file
1222 Args:
1223 filename: Index filename.
1224 progress: Progress report function
1225 Returns: Checksum of index file
12061226 """
12071227 entries = self.sorted_entries(progress=progress)
12081228 with GitFile(filename, 'wb') as f:
12111231 def create_index_v2(self, filename, progress=None):
12121232 """Create a version 2 index file for this data file.
12131233
1214 :param filename: Index filename.
1215 :param progress: Progress report function
1216 :return: Checksum of index file
1234 Args:
1235 filename: Index filename.
1236 progress: Progress report function
1237 Returns: Checksum of index file
12171238 """
12181239 entries = self.sorted_entries(progress=progress)
12191240 with GitFile(filename, 'wb') as f:
12231244 version=2):
12241245 """Create an index file for this data file.
12251246
1226 :param filename: Index filename.
1227 :param progress: Progress report function
1228 :return: Checksum of index file
1247 Args:
1248 filename: Index filename.
1249 progress: Progress report function
1250 Returns: Checksum of index file
12291251 """
12301252 if version == 1:
12311253 return self.create_index_v1(filename, progress)
14801502 def pack_object_header(type_num, delta_base, size):
14811503 """Create a pack object header for the given object info.
14821504
1483 :param type_num: Numeric type of the object.
1484 :param delta_base: Delta base offset or ref, or None for whole objects.
1485 :param size: Uncompressed object size.
1486 :return: A header for a packed object.
1505 Args:
1506 type_num: Numeric type of the object.
1507 delta_base: Delta base offset or ref, or None for whole objects.
1508 size: Uncompressed object size.
1509 Returns: A header for a packed object.
14871510 """
14881511 header = []
14891512 c = (type_num << 4) | (size & 15)
15101533 def write_pack_object(f, type, object, sha=None):
15111534 """Write pack object to a file.
15121535
1513 :param f: File to write to
1514 :param type: Numeric type of the object
1515 :param object: Object to write
1516 :return: Tuple with offset at which the object was written, and crc32
1536 Args:
1537 f: File to write to
1538 type: Numeric type of the object
1539 object: Object to write
1540 Returns: Tuple with offset at which the object was written, and crc32
15171541 """
15181542 if type in DELTA_TYPES:
15191543 delta_base, object = object
15331557 def write_pack(filename, objects, deltify=None, delta_window_size=None):
15341558 """Write a new pack data file.
15351559
1536 :param filename: Path to the new pack file (without .pack extension)
1537 :param objects: Iterable of (object, path) tuples to write.
1560 Args:
1561 filename: Path to the new pack file (without .pack extension)
1562 objects: Iterable of (object, path) tuples to write.
15381563 Should provide __len__
1539 :param window_size: Delta window size
1540 :param deltify: Whether to deltify pack objects
1541 :return: Tuple with checksum of pack file and index file
1564 window_size: Delta window size
1565 deltify: Whether to deltify pack objects
1566 Returns: Tuple with checksum of pack file and index file
15421567 """
15431568 with GitFile(filename + '.pack', 'wb') as f:
15441569 entries, data_sum = write_pack_objects(
15581583 def deltify_pack_objects(objects, window_size=None):
15591584 """Generate deltas for pack objects.
15601585
1561 :param objects: An iterable of (object, path) tuples to deltify.
1562 :param window_size: Window size; None for default
1563 :return: Iterator over type_num, object id, delta_base, content
1586 Args:
1587 objects: An iterable of (object, path) tuples to deltify.
1588 window_size: Window size; None for default
1589 Returns: Iterator over type_num, object id, delta_base, content
15641590 delta_base is None for full text entries
15651591 """
15661592 # TODO(jelmer): Use threads
15951621 def pack_objects_to_data(objects):
15961622 """Create pack data from objects
15971623
1598 :param objects: Pack objects
1599 :return: Tuples with (type_num, hexdigest, delta base, object chunks)
1624 Args:
1625 objects: Pack objects
1626 Returns: Tuples with (type_num, hexdigest, delta base, object chunks)
16001627 """
16011628 count = len(objects)
16021629 return (count,
16071634 def write_pack_objects(f, objects, delta_window_size=None, deltify=None):
16081635 """Write a new pack data file.
16091636
1610 :param f: File to write to
1611 :param objects: Iterable of (object, path) tuples to write.
1637 Args:
1638 f: File to write to
1639 objects: Iterable of (object, path) tuples to write.
16121640 Should provide __len__
1613 :param window_size: Sliding window size for searching for deltas;
1641 window_size: Sliding window size for searching for deltas;
16141642 Set to None for default window size.
1615 :param deltify: Whether to deltify objects
1616 :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
1643 deltify: Whether to deltify objects
1644 Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
16171645 """
16181646 if deltify is None:
16191647 # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
16311659 def write_pack_data(f, num_records, records, progress=None):
16321660 """Write a new pack data file.
16331661
1634 :param f: File to write to
1635 :param num_records: Number of records
1636 :param records: Iterator over type_num, object_id, delta_base, raw
1637 :param progress: Function to report progress to
1638 :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
1662 Args:
1663 f: File to write to
1664 num_records: Number of records
1665 records: Iterator over type_num, object_id, delta_base, raw
1666 progress: Function to report progress to
1667 Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
16391668 """
16401669 # Write the pack
16411670 entries = {}
16641693 def write_pack_index_v1(f, entries, pack_checksum):
16651694 """Write a new pack index file.
16661695
1667 :param f: A file-like object to write to
1668 :param entries: List of tuples with object name (sha), offset_in_pack,
1696 Args:
1697 f: A file-like object to write to
1698 entries: List of tuples with object name (sha), offset_in_pack,
16691699 and crc32_checksum.
1670 :param pack_checksum: Checksum of the pack file.
1671 :return: The SHA of the written index file
1700 pack_checksum: Checksum of the pack file.
1701 Returns: The SHA of the written index file
16721702 """
16731703 f = SHA1Writer(f)
16741704 fan_out_table = defaultdict(lambda: 0)
17221752 def create_delta(base_buf, target_buf):
17231753 """Use python difflib to work out how to transform base_buf to target_buf.
17241754
1725 :param base_buf: Base buffer
1726 :param target_buf: Target buffer
1755 Args:
1756 base_buf: Base buffer
1757 target_buf: Target buffer
17271758 """
17281759 assert isinstance(base_buf, bytes)
17291760 assert isinstance(target_buf, bytes)
17651796 def apply_delta(src_buf, delta):
17661797 """Based on the similar function in git's patch-delta.c.
17671798
1768 :param src_buf: Source buffer
1769 :param delta: Delta instructions
1799 Args:
1800 src_buf: Source buffer
1801 delta: Delta instructions
17701802 """
17711803 if not isinstance(src_buf, bytes):
17721804 src_buf = b''.join(src_buf)
18321864 def write_pack_index_v2(f, entries, pack_checksum):
18331865 """Write a new pack index file.
18341866
1835 :param f: File-like object to write to
1836 :param entries: List of tuples with object name (sha), offset_in_pack, and
1867 Args:
1868 f: File-like object to write to
1869 entries: List of tuples with object name (sha), offset_in_pack, and
18371870 crc32_checksum.
1838 :param pack_checksum: Checksum of the pack file.
1839 :return: The SHA of the index file written
1871 pack_checksum: Checksum of the pack file.
1872 Returns: The SHA of the index file written
18401873 """
18411874 f = SHA1Writer(f)
18421875 f.write(b'\377tOc') # Magic!
19161949 def index(self):
19171950 """The index being used.
19181951
1919 :note: This may be an in-memory index
1952 Note: This may be an in-memory index
19201953 """
19211954 if self._idx is None:
19221955 self._idx = self._idx_load()
19601993 def check(self):
19611994 """Check the integrity of this pack.
19621995
1963 :raise ChecksumMismatch: if a checksum for the index or data is wrong
1996 Raises:
1997 ChecksumMismatch: if a checksum for the index or data is wrong
19641998 """
19651999 self.index.check()
19662000 self.data.check()
19822016 def get_raw_unresolved(self, sha1):
19832017 """Get raw unresolved data for a SHA.
19842018
1985 :param sha1: SHA to return data for
1986 :return: Tuple with pack object type, delta base (if applicable),
2019 Args:
2020 sha1: SHA to return data for
2021 Returns: Tuple with pack object type, delta base (if applicable),
19872022 list of data chunks
19882023 """
19892024 offset = self.index.object_index(sha1)
20142049 def pack_tuples(self):
20152050 """Provide an iterable for use with write_pack_objects.
20162051
2017 :return: Object that can iterate over (object, path) tuples
2052 Returns: Object that can iterate over (object, path) tuples
20182053 and provides __len__
20192054 """
20202055 class PackTupleIterable(object):
20332068 def keep(self, msg=None):
20342069 """Add a .keep file for the pack, preventing git from garbage collecting it.
20352070
2036 :param msg: A message written inside the .keep file; can be used later
2071 Args:
2072 msg: A message written inside the .keep file; can be used later
20372073 to determine whether or not a .keep file is obsolete.
2038 :return: The path of the .keep file, as a string.
2074 Returns: The path of the .keep file, as a string.
20392075 """
20402076 keepfile_name = '%s.keep' % self._basename
20412077 with GitFile(keepfile_name, 'wb') as keepfile: