4343from tensorflow_docs .tools .nblint .decorator import Options
4444
4545
46+ def search_wordlist (wordlist , src_str ):
47+ """Search for wordlist entries in text and return set of found items.
48+
49+ Args:
50+ wordlist: Dict of word entries and recommendations to search in string.
51+ src_str: String to search for word entries.
52+
53+ Returns:
54+ A dict that is a subset of entries from `wordlist` found in `src_str`.
55+ """
56+ found_words = {}
57+ for word in wordlist :
58+ # Word-boundary and ignore between path separator '/'.
59+ if re .search (rf"[^/]\b{ word } \b[^/]" , src_str , re .IGNORECASE ):
60+ alt_word = wordlist [word ]
61+ if not alt_word :
62+ alt_word = "n/a"
63+ found_words [word ] = alt_word
64+ return found_words
65+
66+
4667# Acceptable copyright heading for notebooks following this style.
4768copyrights_re = [
4869 r"Copyright 20[1-9][0-9] The TensorFlow\s.*?\s?Authors" ,
@@ -63,7 +84,8 @@ def copyright_check(args):
6384@lint (
6485 message = "Apache license cell is required" ,
6586 scope = Options .Scope .CODE ,
66- cond = Options .Cond .ANY )
87+ cond = Options .Cond .ANY ,
88+ )
6789def license_check (args ):
6890 if license_re .search (args ["cell_source" ]):
6991 return True
@@ -105,8 +127,10 @@ def get_arg_or_fail(user_args, arg_name, arg_fmt):
105127 return user_args .get (arg_name )
106128 else :
107129 fail (
108- f"Requires user-argument '{ arg_name } ': nblint --arg={ arg_name } :{ arg_fmt } ..." ,
109- always_show = True )
130+ f"Requires user-argument '{ arg_name } ': nblint"
131+ f" --arg={ arg_name } :{ arg_fmt } ..." ,
132+ always_show = True ,
133+ )
110134
111135
112136def split_doc_path (filepath ):
@@ -136,8 +160,8 @@ def split_doc_path(filepath):
136160 def split_path_on_dir (fp , dirname , offset = 1 ):
137161 parts = fp .parts
138162 idx = parts .index (dirname )
139- docs_dir = pathlib .Path (* parts [idx : idx + offset ])
140- rel_path = fp .relative_to (* parts [:idx + offset ])
163+ docs_dir = pathlib .Path (* parts [idx : idx + offset ])
164+ rel_path = fp .relative_to (* parts [: idx + offset ])
141165 return docs_dir , rel_path
142166
143167 if "site" in fp_full .parts :
@@ -159,7 +183,8 @@ def split_path_on_dir(fp, dirname, offset=1):
159183@lint (
160184 message = "Missing or malformed URL in Colab button." ,
161185 scope = Options .Scope .TEXT ,
162- cond = Options .Cond .ANY )
186+ cond = Options .Cond .ANY ,
187+ )
163188def button_colab (args ):
164189 """Test that the URL in the Colab button matches the file path."""
165190 cell_source = args ["cell_source" ]
@@ -180,13 +205,15 @@ def button_colab(args):
180205 fail (
181206 f"Colab button URL doesn't match: { this_url } " ,
182207 fix = fix .regex_between_groups_replace_all ,
183- fix_args = [r"(href.*)http.*?(\\\".*colab_logo_32px.png)" , this_url ])
208+ fix_args = [r"(href.*)http.*?(\\\".*colab_logo_32px.png)" , this_url ],
209+ )
184210
185211
186212@lint (
187213 message = "Missing or malformed URL in Download button." ,
188214 scope = Options .Scope .TEXT ,
189- cond = Options .Cond .ANY )
215+ cond = Options .Cond .ANY ,
216+ )
190217def button_download (args ):
191218 """Test that the URL in the Download button matches the file path."""
192219 cell_source = args ["cell_source" ]
@@ -203,21 +230,24 @@ def button_download(args):
203230
204231 this_url = urllib .parse .urljoin (
205232 "https://storage.googleapis.com" ,
206- str (f"tensorflow_docs/{ repo_name } " / docs_dir / rel_path ))
233+ str (f"tensorflow_docs/{ repo_name } " / docs_dir / rel_path ),
234+ )
207235
208236 if is_button_cell_re .search (cell_source ) and cell_source .find (this_url ) != - 1 :
209237 return True
210238 else :
211239 fail (
212240 f"Download button URL doesn't match: { this_url } " ,
213241 fix = fix .regex_between_groups_replace_all ,
214- fix_args = [r"(href.*)http.*?(\\\".*download_logo_32px.png)" , this_url ])
242+ fix_args = [r"(href.*)http.*?(\\\".*download_logo_32px.png)" , this_url ],
243+ )
215244
216245
217246@lint (
218247 message = "Missing or malformed URL in GitHub button." ,
219248 scope = Options .Scope .TEXT ,
220- cond = Options .Cond .ANY )
249+ cond = Options .Cond .ANY ,
250+ )
221251def button_github (args ):
222252 """Test that the URL in the GitHub button matches the file path."""
223253 cell_source = args ["cell_source" ]
@@ -238,13 +268,15 @@ def button_github(args):
238268 fail (
239269 f"GitHub button URL doesn't match: { this_url } " ,
240270 fix = fix .regex_between_groups_replace_all ,
241- fix_args = [r"(href.*)http.*?(\\\".*GitHub-Mark-32px.png)" , this_url ])
271+ fix_args = [r"(href.*)http.*?(\\\".*GitHub-Mark-32px.png)" , this_url ],
272+ )
242273
243274
244275@lint (
245276 message = "Missing or malformed URL in 'View on' button." ,
246277 scope = Options .Scope .TEXT ,
247- cond = Options .Cond .ANY )
278+ cond = Options .Cond .ANY ,
279+ )
248280def button_website (args ):
249281 """Test that the website URL in the 'View on' button matches the file path.
250282
@@ -289,7 +321,8 @@ def button_website(args):
289321@lint (
290322 message = "Missing or malformed URL in 'TFHub' button." ,
291323 scope = Options .Scope .TEXT ,
292- cond = Options .Cond .ANY )
324+ cond = Options .Cond .ANY ,
325+ )
293326def button_hub (args ):
294327 """Notebooks that mention tfhub.dev should have a TFHub button."""
295328 cell_source = args ["cell_source" ]
@@ -312,7 +345,8 @@ def button_hub(args):
312345@lint (
313346 message = "Remove extra buttons from TF 1.x docs." ,
314347 scope = Options .Scope .TEXT ,
315- cond = Options .Cond .ALL )
348+ cond = Options .Cond .ALL ,
349+ )
316350def button_r1_extra (args ):
317351 """The r1/ docs should not have website or download buttons."""
318352 cell_source = args ["cell_source" ]
@@ -332,10 +366,66 @@ def button_r1_extra(args):
332366 base_url = "https://www.tensorflow.org/"
333367
334368 # Look for button URLs that shouldn't be there..
335- if (re .search (f"{ base_url } /(?!images)" , cell_source ) or
336- cell_source .find (download_url ) != - 1 ):
369+ if (
370+ re .search (f"{ base_url } /(?!images)" , cell_source )
371+ or cell_source .find (download_url ) != - 1
372+ ):
373+ fail (
374+ "Remove the 'View on' and 'Download notebook' buttons since r1/ docs"
375+ " are not published."
376+ )
377+ else :
378+ return True
379+
380+
381+ # Non-exhaustive list: {word: alt-word} (Use False if alt not provided.)
382+ _SECOND_PERSON_WORDLIST = {"we" : "you" , "we're" : "you are" }
383+
384+
385+ @lint (
386+ message = (
387+ "Prefer second person instead of first person:"
388+ " https://developers.google.com/style/person"
389+ ),
390+ cond = Options .Cond .ALL ,
391+ )
392+ def second_person (args ):
393+ """Test for first person usage in doc and recommend second person."""
394+ found_words = search_wordlist (_SECOND_PERSON_WORDLIST , args ["cell_source" ])
395+ if found_words :
396+ words = ", " .join ([f"{ word } => { alt } " for word , alt in found_words .items ()])
397+ fail (
398+ f"Prefer second person instead of first person. Found: { words } in"
399+ f" { args ['cell_source' ]} "
400+ )
401+ else :
402+ return True
403+
404+
405+ # Non-exhaustive list: {word: alt-word} (Use False if alt not provided.)
406+ _INCLUSIVE_WORDLIST = {
407+ "blacklist" : "blocked" ,
408+ "whitelist" : "allowed" ,
409+ "master" : "primary" ,
410+ "slave" : "replica" ,
411+ }
412+
413+
414+ @lint (
415+ message = (
416+ "Use inclusive language:"
417+ " https://developers.google.com/style/inclusive-documentation"
418+ ),
419+ cond = Options .Cond .ALL ,
420+ )
421+ def inclusive_language (args ):
422+ """Test for words found in inclusive wordlist and recommend alternatives."""
423+ found_words = search_wordlist (_INCLUSIVE_WORDLIST , args ["cell_source" ])
424+ if found_words :
425+ words = ", " .join ([f"{ word } => { alt } " for word , alt in found_words .items ()])
337426 fail (
338- "Remove the 'View on' and 'Download notebook' buttons since r1/ docs are not published."
427+ f"Use inclusive language where possible and accurate. Found: { words } in"
428+ f" { args ['cell_source' ]} "
339429 )
340430 else :
341431 return True
0 commit comments