hash.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. // Copyright (c) 2019, The Garble Authors.
  2. // See LICENSE for licensing information.
  3. package main
  4. import (
  5. "bytes"
  6. "crypto/sha256"
  7. "encoding/base64"
  8. "fmt"
  9. "go/token"
  10. "go/types"
  11. "io"
  12. "os/exec"
  13. "strings"
  14. )
  15. const buildIDSeparator = "/"
  16. // splitActionID returns the action ID half of a build ID, the first component.
  17. func splitActionID(buildID string) string {
  18. return buildID[:strings.Index(buildID, buildIDSeparator)]
  19. }
  20. // splitContentID returns the content ID half of a build ID, the last component.
  21. func splitContentID(buildID string) string {
  22. return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:]
  23. }
  24. // decodeHash is the opposite of hashToString, with a panic for error handling
  25. // since it should never happen.
  26. func decodeHash(str string) []byte {
  27. h, err := base64.RawURLEncoding.DecodeString(str)
  28. if err != nil {
  29. panic(fmt.Sprintf("invalid hash %q: %v", str, err))
  30. }
  31. return h
  32. }
  33. func alterToolVersion(tool string, args []string) error {
  34. cmd := exec.Command(args[0], args[1:]...)
  35. out, err := cmd.Output()
  36. if err != nil {
  37. if err, _ := err.(*exec.ExitError); err != nil {
  38. return fmt.Errorf("%v: %s", err, err.Stderr)
  39. }
  40. return err
  41. }
  42. line := string(bytes.TrimSpace(out)) // no trailing newline
  43. f := strings.Fields(line)
  44. if len(f) < 3 || f[0] != tool || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") {
  45. return fmt.Errorf("%s -V=full: unexpected output:\n\t%s", args[0], line)
  46. }
  47. var toolID []byte
  48. if f[2] == "devel" {
  49. // On the development branch, use the content ID part of the build ID.
  50. toolID = decodeHash(splitContentID(f[len(f)-1]))
  51. } else {
  52. // For a release, the output is like: "compile version go1.9.1 X:framepointer".
  53. // Use the whole line, as we can assume it's unique.
  54. toolID = []byte(line)
  55. }
  56. contentID := addGarbleToHash(toolID)
  57. // The part of the build ID that matters is the last, since it's the
  58. // "content ID" which is used to work out whether there is a need to redo
  59. // the action (build) or not. Since cmd/go parses the last word in the
  60. // output as "buildID=...", we simply add "+garble buildID=_/_/_/${hash}".
  61. // The slashes let us imitate a full binary build ID, but we assume that
  62. // the other components such as the action ID are not necessary, since the
  63. // only reader here is cmd/go and it only consumes the content ID.
  64. fmt.Printf("%s +garble buildID=_/_/_/%s\n", line, hashToString(contentID))
  65. return nil
  66. }
  67. var (
  68. hasher = sha256.New()
  69. sumBuffer [sha256.Size]byte
  70. b64SumBuffer [44]byte // base64's EncodedLen on sha256.Size (32) with no padding
  71. )
  72. // addGarbleToHash takes some arbitrary input bytes,
  73. // typically a hash such as an action ID or a content ID,
  74. // and returns a new hash which also contains garble's own deterministic inputs.
  75. //
  76. // This includes garble's own version, obtained via its own binary's content ID,
  77. // as well as any other options which affect a build, such as GOGARBLE and -tiny.
  78. func addGarbleToHash(inputHash []byte) []byte {
  79. // Join the two content IDs together into a single base64-encoded sha256
  80. // sum. This includes the original tool's content ID, and garble's own
  81. // content ID.
  82. hasher.Reset()
  83. hasher.Write(inputHash)
  84. if len(cache.BinaryContentID) == 0 {
  85. panic("missing binary content ID")
  86. }
  87. hasher.Write(cache.BinaryContentID)
  88. // We also need to add the selected options to the full version string,
  89. // because all of them result in different output. We use spaces to
  90. // separate the env vars and flags, to reduce the chances of collisions.
  91. if cache.GOGARBLE != "" {
  92. fmt.Fprintf(hasher, " GOGARBLE=%s", cache.GOGARBLE)
  93. }
  94. appendFlags(hasher, true)
  95. // addGarbleToHash returns the sum buffer, so we need a new copy.
  96. // Otherwise the next use of the global sumBuffer would conflict.
  97. sumBuffer := make([]byte, 0, sha256.Size)
  98. return hasher.Sum(sumBuffer)[:buildIDComponentLength]
  99. }
  100. // appendFlags writes garble's own flags to w in string form.
  101. // Errors are ignored, as w is always a buffer or hasher.
  102. // If forBuildHash is set, only the flags affecting a build are written.
  103. func appendFlags(w io.Writer, forBuildHash bool) {
  104. if flagLiterals {
  105. io.WriteString(w, " -literals")
  106. }
  107. if flagTiny {
  108. io.WriteString(w, " -tiny")
  109. }
  110. if flagDebug && !forBuildHash {
  111. // -debug doesn't affect the build result at all,
  112. // so don't give it separate entries in the build cache.
  113. // If the user really wants to see debug info for already built deps,
  114. // they can use "go clean cache" or the "-a" build flag to rebuild.
  115. io.WriteString(w, " -debug")
  116. }
  117. if flagDebugDir != "" && !forBuildHash {
  118. // -debugdir is a bit special.
  119. //
  120. // When passing down flags via -toolexec,
  121. // we do want the actual flag value to be kept.
  122. //
  123. // For build hashes, we can skip the flag entirely,
  124. // as it doesn't affect obfuscation at all.
  125. //
  126. // TODO: in the future, we could avoid using the -a build flag
  127. // by using "-debugdir=yes" here, and caching the obfuscated source.
  128. // Incremental builds would recover the cached source
  129. // to repopulate the output directory if it was removed.
  130. io.WriteString(w, " -debugdir=")
  131. io.WriteString(w, flagDebugDir)
  132. }
  133. if flagSeed.present() {
  134. io.WriteString(w, " -seed=")
  135. io.WriteString(w, flagSeed.String())
  136. }
  137. }
  138. // buildIDComponentLength is the number of bytes each build ID component takes,
  139. // such as an action ID or a content ID.
  140. const buildIDComponentLength = 15
  141. // hashToString encodes the first 120 bits of a sha256 sum in base64, the same
  142. // format used for components in a build ID.
  143. func hashToString(h []byte) string {
  144. return base64.RawURLEncoding.EncodeToString(h[:buildIDComponentLength])
  145. }
  146. func buildidOf(path string) (string, error) {
  147. cmd := exec.Command("go", "tool", "buildid", path)
  148. out, err := cmd.Output()
  149. if err != nil {
  150. if err, _ := err.(*exec.ExitError); err != nil {
  151. return "", fmt.Errorf("%v: %s", err, err.Stderr)
  152. }
  153. return "", err
  154. }
  155. return string(out), nil
  156. }
  157. var (
  158. // Hashed names are base64-encoded.
  159. // Go names can only be letters, numbers, and underscores.
  160. // This means we can use base64's URL encoding, minus '-'.
  161. // Use the URL encoding, replacing '-' with a duplicate 'z'.
  162. // Such a lossy encoding is fine, since we never decode hashes.
  163. nameCharset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_z"
  164. nameBase64 = base64.NewEncoding(nameCharset)
  165. )
  166. // These funcs mimic the unicode package API, but byte-based since we know
  167. // base64 is all ASCII.
  168. func isDigit(b byte) bool { return '0' <= b && b <= '9' }
  169. func isLower(b byte) bool { return 'a' <= b && b <= 'z' }
  170. func isUpper(b byte) bool { return 'A' <= b && b <= 'Z' }
  171. func toLower(b byte) byte { return b + ('a' - 'A') }
  172. func toUpper(b byte) byte { return b - ('a' - 'A') }
  173. func hashWithPackage(pkg *listedPackage, name string) string {
  174. if !flagSeed.present() {
  175. return hashWithCustomSalt(pkg.GarbleActionID, name)
  176. }
  177. // Use a separator at the end of ImportPath as a salt,
  178. // to ensure that "pkgfoo.bar" and "pkg.foobar" don't both hash
  179. // as the same string "pkgfoobar".
  180. return hashWithCustomSalt([]byte(pkg.ImportPath+"|"), name)
  181. }
  182. func hashWithStruct(strct *types.Struct, fieldName string) string {
  183. // TODO: We should probably strip field tags here.
  184. // Do we need to do anything else to make a
  185. // struct type "canonical"?
  186. fieldsSalt := []byte(strct.String())
  187. if !flagSeed.present() {
  188. fieldsSalt = addGarbleToHash(fieldsSalt)
  189. }
  190. return hashWithCustomSalt(fieldsSalt, fieldName)
  191. }
  192. // hashWithCustomSalt returns a hashed version of name,
  193. // including the provided salt as well as opts.Seed into the hash input.
  194. //
  195. // The result is always four bytes long. If the input was a valid identifier,
  196. // the output remains equally exported or unexported. Note that this process is
  197. // reproducible, but not reversible.
  198. func hashWithCustomSalt(salt []byte, name string) string {
  199. if len(salt) == 0 {
  200. panic("hashWithCustomSalt: empty salt")
  201. }
  202. if name == "" {
  203. panic("hashWithCustomSalt: empty name")
  204. }
  205. // hashLength is the number of base64 characters to use for the final
  206. // hashed name.
  207. // This needs to be long enough to realistically avoid hash collisions,
  208. // but short enough to not bloat binary sizes.
  209. // The namespace for collisions is generally a single package, since
  210. // that's where most hashed names are namespaced to.
  211. // Using a "hash collision" formula, and taking a generous estimate of a
  212. // package having 10k names, we get the following probabilities.
  213. // Most packages will have far fewer names, but some packages are huge,
  214. // especially generated ones.
  215. // We also have slightly fewer bits in practice, since the base64
  216. // charset has 'z' twice, and the first base64 char is coerced into a
  217. // valid Go identifier. So we must be conservative.
  218. // Remember that base64 stores 6 bits per encoded byte.
  219. // The probability numbers are approximated.
  220. //
  221. // length (base64) | length (bits) | collision probability
  222. // -------------------------------------------------------
  223. // 4 24 ~95%
  224. // 5 30 ~4%
  225. // 6 36 ~0.07%
  226. // 7 42 ~0.001%
  227. // 8 48 ~0.00001%
  228. //
  229. // We want collisions to be practically impossible, so we choose 8 to
  230. // end up with a chance of about 1 in a million even when a package has
  231. // thousands of obfuscated names.
  232. const hashLength = 8
  233. hasher.Reset()
  234. hasher.Write(salt)
  235. hasher.Write(flagSeed.bytes)
  236. io.WriteString(hasher, name)
  237. nameBase64.Encode(b64SumBuffer[:], hasher.Sum(sumBuffer[:0]))
  238. b64Name := b64SumBuffer[:hashLength]
  239. // Even if we are hashing a package path, we still want the result to be
  240. // a valid identifier, since we'll use it as the package name too.
  241. if isDigit(b64Name[0]) {
  242. // Turn "3foo" into "Dfoo".
  243. // Similar to toLower, since uppercase letters go after digits
  244. // in the ASCII table.
  245. b64Name[0] += 'A' - '0'
  246. }
  247. // Keep the result equally exported or not, if it was an identifier.
  248. if !token.IsIdentifier(name) {
  249. return string(b64Name)
  250. }
  251. if token.IsExported(name) {
  252. if b64Name[0] == '_' {
  253. // Turn "_foo" into "Zfoo".
  254. b64Name[0] = 'Z'
  255. } else if isLower(b64Name[0]) {
  256. // Turn "afoo" into "Afoo".
  257. b64Name[0] = toUpper(b64Name[0])
  258. }
  259. } else {
  260. if isUpper(b64Name[0]) {
  261. // Turn "Afoo" into "afoo".
  262. b64Name[0] = toLower(b64Name[0])
  263. }
  264. }
  265. return string(b64Name)
  266. }