randomtext.py 22 KB


  1. #!/bin/env python
  2. #Copyright ReportLab Europe Ltd. 2000-2017
  3. #see license.txt for license details
  4. #history https://hg.reportlab.com/hg-public/reportlab/log/tip/src/reportlab/lib/randomtext.py
  5. __version__='3.3.0'
  6. ###############################################################################
  7. # generates so-called 'Greek Text' for use in filling documents.
  8. ###############################################################################
  9. __doc__="""Like Lorem Ipsum, but more fun and extensible.
  10. This module exposes a function randomText() which generates paragraphs.
  11. These can be used when testing out document templates and stylesheets.
  12. A number of 'themes' are provided - please contribute more!
  13. We need some real Greek text too.
  14. There are currently six themes provided:
  15. STARTUP (words suitable for a business plan - or not as the case may be),
  16. COMPUTERS (names of programming languages and operating systems etc),
  17. BLAH (variations on the word 'blah'),
  18. BUZZWORD (buzzword bingo),
  19. STARTREK (Star Trek),
  20. PRINTING (print-related terms)
  21. PYTHON (snippets and quotes from Monty Python)
  22. CHOMSKY (random lingusitic nonsense)
  23. EXAMPLE USAGE:
  24. from reportlab.lib import randomtext
  25. print randomtext.randomText(randomtext.PYTHON, 10)
  26. This prints a random number of random sentences (up to a limit
  27. of ten) using the theme 'PYTHON'.
  28. """
  29. #theme one :-)
  30. STARTUP = ['strategic', 'direction', 'proactive', 'venture capital',
  31. 'reengineering', 'forecast', 'resources', 'SWOT analysis',
  32. 'forward-thinking', 'profit', 'growth', 'doubletalk', 'B2B', 'B2C',
  33. 'venture capital', 'IPO', "NASDAQ meltdown - we're all doomed!"]
  34. #theme two - computery things.
  35. COMPUTERS = ['Python', 'Perl', 'Pascal', 'Java', 'Javascript',
  36. 'VB', 'Basic', 'LISP', 'Fortran', 'ADA', 'APL', 'C', 'C++',
  37. 'assembler', 'Larry Wall', 'Guido van Rossum', 'XML', 'HTML',
  38. 'cgi', 'cgi-bin', 'Amiga', 'Macintosh', 'Dell', 'Microsoft',
  39. 'firewall', 'server', 'Linux', 'Unix', 'MacOS', 'BeOS', 'AS/400',
  40. 'sendmail', 'TCP/IP', 'SMTP', 'RFC822-compliant', 'dynamic',
  41. 'Internet', 'A/UX', 'Amiga OS', 'BIOS', 'boot managers', 'CP/M',
  42. 'DOS', 'file system', 'FreeBSD', 'Freeware', 'GEOS', 'GNU',
  43. 'Hurd', 'Linux', 'Mach', 'Macintosh OS', 'mailing lists', 'Minix',
  44. 'Multics', 'NetWare', 'NextStep', 'OS/2', 'Plan 9', 'Realtime',
  45. 'UNIX', 'VMS', 'Windows', 'X Windows', 'Xinu', 'security', 'Intel',
  46. 'encryption', 'PGP' , 'software', 'ActiveX', 'AppleScript', 'awk',
  47. 'BETA', 'COBOL', 'Delphi', 'Dylan', 'Eiffel', 'extreme programming',
  48. 'Forth', 'Fortran', 'functional languages', 'Guile', 'format your hard drive',
  49. 'Icon', 'IDL', 'Infer', 'Intercal', 'J', 'Java', 'JavaScript', 'CD-ROM',
  50. 'JCL', 'Lisp', '"literate programming"', 'Logo', 'MUMPS', 'C: drive',
  51. 'Modula-2', 'Modula-3', 'Oberon', 'Occam', 'OpenGL', 'parallel languages',
  52. 'Pascal', 'Perl', 'PL/I', 'PostScript', 'Prolog', 'hardware', 'Blue Screen of Death',
  53. 'Rexx', 'RPG', 'Scheme', 'scripting languages', 'Smalltalk', 'crash!', 'disc crash',
  54. 'Spanner', 'SQL', 'Tcl/Tk', 'TeX', 'TOM', 'Visual', 'Visual Basic', '4GL',
  55. 'VRML', 'Virtual Reality Modeling Language', 'difference engine', '...went into "yo-yo mode"',
  56. 'Sun', 'Sun Microsystems', 'Hewlett Packard', 'output device',
  57. 'CPU', 'memory', 'registers', 'monitor', 'TFT display', 'plasma screen',
  58. 'bug report', '"mis-feature"', '...millions of bugs!', 'pizza',
  59. '"illiterate programming"','...lots of pizza!', 'pepperoni pizza',
  60. 'coffee', 'Jolt Cola[TM]', 'beer', 'BEER!']
  61. #theme three - 'blah' - for when you want to be subtle. :-)
  62. BLAH = ['Blah', 'BLAH', 'blahblah', 'blahblahblah', 'blah-blah',
  63. 'blah!', '"Blah Blah Blah"', 'blah-de-blah', 'blah?', 'blah!!!',
  64. 'blah...', 'Blah.', 'blah;', 'blah, Blah, BLAH!', 'Blah!!!']
  65. #theme four - 'buzzword bingo' time!
  66. BUZZWORD = ['intellectual capital', 'market segment', 'flattening',
  67. 'regroup', 'platform', 'client-based', 'long-term', 'proactive',
  68. 'quality vector', 'out of the loop', 'implement',
  69. 'streamline', 'cost-centered', 'phase', 'synergy',
  70. 'synergize', 'interactive', 'facilitate',
  71. 'appropriate', 'goal-setting', 'empowering', 'low-risk high-yield',
  72. 'peel the onion', 'goal', 'downsize', 'result-driven',
  73. 'conceptualize', 'multidisciplinary', 'gap analysis', 'dysfunctional',
  74. 'networking', 'knowledge management', 'goal-setting',
  75. 'mastery learning', 'communication', 'real-estate', 'quarterly',
  76. 'scalable', 'Total Quality Management', 'best of breed',
  77. 'nimble', 'monetize', 'benchmark', 'hardball',
  78. 'client-centered', 'vision statement', 'empowerment',
  79. 'lean & mean', 'credibility', 'synergistic',
  80. 'backward-compatible', 'hardball', 'stretch the envelope',
  81. 'bleeding edge', 'networking', 'motivation', 'best practice',
  82. 'best of breed', 'implementation', 'Total Quality Management',
  83. 'undefined', 'disintermediate', 'mindset', 'architect',
  84. 'gap analysis', 'morale', 'objective', 'projection',
  85. 'contribution', 'proactive', 'go the extra mile', 'dynamic',
  86. 'world class', 'real estate', 'quality vector', 'credibility',
  87. 'appropriate', 'platform', 'projection', 'mastery learning',
  88. 'recognition', 'quality', 'scenario', 'performance based',
  89. 'solutioning', 'go the extra mile', 'downsize', 'phase',
  90. 'networking', 'experiencing slippage', 'knowledge management',
  91. 'high priority', 'process', 'ethical', 'value-added', 'implement',
  92. 're-factoring', 're-branding', 'embracing change']
  93. #theme five - Star Trek
  94. STARTREK = ['Starfleet', 'Klingon', 'Romulan', 'Cardassian', 'Vulcan',
  95. 'Benzite', 'IKV Pagh', 'emergency transponder', 'United Federation of Planets',
  96. 'Bolian', "K'Vort Class Bird-of-Prey", 'USS Enterprise', 'USS Intrepid',
  97. 'USS Reliant', 'USS Voyager', 'Starfleet Academy', 'Captain Picard',
  98. 'Captain Janeway', 'Tom Paris', 'Harry Kim', 'Counsellor Troi',
  99. 'Lieutenant Worf', 'Lieutenant Commander Data', 'Dr. Beverly Crusher',
  100. 'Admiral Nakamura', 'Irumodic Syndrome', 'Devron system', 'Admiral Pressman',
  101. 'asteroid field', 'sensor readings', 'Binars', 'distress signal', 'shuttlecraft',
  102. 'cloaking device', 'shuttle bay 2', 'Dr. Pulaski', 'Lwaxana Troi', 'Pacifica',
  103. 'William Riker', "Chief O'Brian", 'Soyuz class science vessel', 'Wolf-359',
  104. 'Galaxy class vessel', 'Utopia Planitia yards', 'photon torpedo', 'Archer IV',
  105. 'quantum flux', 'spacedock', 'Risa', 'Deep Space Nine', 'blood wine',
  106. 'quantum torpedoes', 'holodeck', 'Romulan Warbird', 'Betazoid', 'turbolift', 'battle bridge',
  107. 'Memory Alpha', '...with a phaser!', 'Romulan ale', 'Ferrengi', 'Klingon opera',
  108. 'Quark', 'wormhole', 'Bajoran', 'cruiser', 'warship', 'battlecruiser', '"Intruder alert!"',
  109. 'scout ship', 'science vessel', '"Borg Invasion imminent!" ', '"Abandon ship!"',
  110. 'Red Alert!', 'warp-core breech', '"All hands abandon ship! This is not a drill!"']
  111. #theme six - print-related terms
  112. PRINTING = ['points', 'picas', 'leading', 'kerning', 'CMYK', 'offset litho',
  113. 'type', 'font family', 'typography', 'type designer',
  114. 'baseline', 'white-out type', 'WOB', 'bicameral', 'bitmap',
  115. 'blockletter', 'bleed', 'margin', 'body', 'widow', 'orphan',
  116. 'cicero', 'cursive', 'letterform', 'sidehead', 'dingbat', 'leader',
  117. 'DPI', 'drop-cap', 'paragraph', 'En', 'Em', 'flush left', 'left justified',
  118. 'right justified', 'centered', 'italic', 'Latin letterform', 'ligature',
  119. 'uppercase', 'lowercase', 'serif', 'sans-serif', 'weight', 'type foundry',
  120. 'fleuron', 'folio', 'gutter', 'whitespace', 'humanist letterform', 'caption',
  121. 'page', 'frame', 'ragged setting', 'flush-right', 'rule', 'drop shadows',
  122. 'prepress', 'spot-colour', 'duotones', 'colour separations', 'four-colour printing',
  123. 'Pantone[TM]', 'service bureau', 'imagesetter']
  124. #it had to be done!...
  125. #theme seven - the "full Monty"!
  126. PYTHON = ['Good evening ladies and Bruces','I want to buy some cheese', 'You do have some cheese, do you?',
  127. "Of course sir, it's a cheese shop sir, we've got...",'discipline?... naked? ... With a melon!?',
  128. 'The Church Police!!' , "There's a dead bishop on the landing", 'Would you like a twist of lemming sir?',
  129. '"Conquistador Coffee brings a new meaning to the word vomit"','Your lupins please',
  130. 'Crelm Toothpaste, with the miracle ingredient Fraudulin',
  131. "Well there's the first result and the Silly Party has held Leicester.",
  132. 'Hello, I would like to buy a fish license please', "Look, it's people like you what cause unrest!",
  133. "When we got home, our Dad would thrash us to sleep with his belt!", 'Luxury', "Gumby Brain Specialist",
  134. "My brain hurts!!!", "My brain hurts too.", "How not to be seen",
  135. "In this picture there are 47 people. None of them can be seen",
  136. "Mrs Smegma, will you stand up please?",
  137. "Mr. Nesbitt has learned the first lesson of 'Not Being Seen', not to stand up.",
  138. "My hovercraft is full of eels", "Ah. You have beautiful thighs.", "My nipples explode with delight",
  139. "Drop your panties Sir William, I cannot wait 'til lunchtime",
  140. "I'm a completely self-taught idiot.", "I always wanted to be a lumberjack!!!",
  141. "Told you so!! Oh, coitus!!", "",
  142. "Nudge nudge?", "Know what I mean!", "Nudge nudge, nudge nudge?", "Say no more!!",
  143. "Hello, well it's just after 8 o'clock, and time for the penguin on top of your television set to explode",
  144. "Oh, intercourse the penguin!!", "Funny that penguin being there, isn't it?",
  145. "I wish to register a complaint.", "Now that's what I call a dead parrot", "Pining for the fjords???",
  146. "No, that's not dead, it's ,uhhhh, resting", "This is an ex-parrot!!",
  147. "That parrot is definitely deceased.", "No, no, no - it's spelt Raymond Luxury Yach-t, but it's pronounced 'Throatwobbler Mangrove'.",
  148. "You're a very silly man and I'm not going to interview you.", "No Mungo... never kill a customer."
  149. "And I'd like to conclude by putting my finger up my nose",
  150. "egg and Spam", "egg bacon and Spam", "egg bacon sausage and Spam", "Spam bacon sausage and Spam",
  151. "Spam egg Spam Spam bacon and Spam", "Spam sausage Spam Spam Spam bacon Spam tomato and Spam",
  152. "Spam Spam Spam egg and Spam", "Spam Spam Spam Spam Spam Spam baked beans Spam Spam Spam",
  153. "Spam!!", "I don't like Spam!!!", "You can't have egg, bacon, Spam and sausage without the Spam!",
  154. "I'll have your Spam. I Love it!",
  155. "I'm having Spam Spam Spam Spam Spam Spam Spam baked beans Spam Spam Spam and Spam",
  156. "Have you got anything without Spam?", "There's Spam egg sausage and Spam, that's not got much Spam in it.",
  157. "No one expects the Spanish Inquisition!!", "Our weapon is surprise, surprise and fear!",
  158. "Get the comfy chair!", "Amongst our weaponry are such diverse elements as: fear, surprise, ruthless efficiency, an almost fanatical devotion to the Pope, and nice red uniforms - Oh damn!",
  159. "Nobody expects the... Oh bugger!", "What swims in the sea and gets caught in nets? Henri Bergson?",
  160. "Goats. Underwater goats with snorkels and flippers?", "A buffalo with an aqualung?",
  161. "Dinsdale was a looney, but he was a happy looney.", "Dinsdale!!",
  162. "The 127th Upper-Class Twit of the Year Show", "What a great Twit!",
  163. "thought by many to be this year's outstanding twit",
  164. "...and there's a big crowd here today to see these prize idiots in action.",
  165. "And now for something completely different.", "Stop that, it's silly",
  166. "We interrupt this program to annoy you and make things generally irritating",
  167. "This depraved and degrading spectacle is going to stop right now, do you hear me?",
  168. "Stop right there!", "This is absolutely disgusting and I'm not going to stand for it",
  169. "I object to all this sex on the television. I mean, I keep falling off",
  170. "Right! Stop that, it's silly. Very silly indeed", "Very silly indeed", "Lemon curry?",
  171. "And now for something completely different, a man with 3 buttocks",
  172. "I've heard of unisex, but I've never had it", "That's the end, stop the program! Stop it!"]
  173. leadins=[
  174. "To characterize a linguistic level L,",
  175. "On the other hand,",
  176. "This suggests that",
  177. "It appears that",
  178. "Furthermore,",
  179. "We will bring evidence in favor of the following thesis: ",
  180. "To provide a constituent structure for T(Z,K),",
  181. "From C1, it follows that",
  182. "For any transformation which is sufficiently diversified in application to be of any interest,",
  183. "Analogously,",
  184. "Clearly,",
  185. "Note that",
  186. "Of course,",
  187. "Suppose, for instance, that",
  188. "Thus",
  189. "With this clarification,",
  190. "Conversely,",
  191. "We have already seen that",
  192. "By combining adjunctions and certain deformations,",
  193. "I suggested that these results would follow from the assumption that",
  194. "If the position of the trace in (99c) were only relatively inaccessible to movement,",
  195. "However, this assumption is not correct, since",
  196. "Comparing these examples with their parasitic gap counterparts in (96) and (97), we see that",
  197. "In the discussion of resumptive pronouns following (81),",
  198. "So far,",
  199. "Nevertheless,",
  200. "For one thing,",
  201. "Summarizing, then, we assume that",
  202. "A consequence of the approach just outlined is that",
  203. "Presumably,",
  204. "On our assumptions,",
  205. "It may be, then, that",
  206. "It must be emphasized, once again, that",
  207. "Let us continue to suppose that",
  208. "Notice, incidentally, that",
  209. "A majority of informed linguistic specialists agree that",
  210. "There is also a different approach to the [unification] problem,",
  211. "This approach divorces the cognitive sciences from a biological setting,",
  212. "The approach relies on the \"Turing Test,\" devised by mathematician Alan Turing,",
  213. "Adopting this approach,",
  214. "There is no fact, no meaningful question to be answered,",
  215. "Another superficial similarity is the interest in simulation of behavior,",
  216. "A lot of sophistication has been developed about the utilization of machines for complex purposes,",
  217. ]
  218. subjects = [
  219. "the notion of level of grammaticalness",
  220. "a case of semigrammaticalness of a different sort",
  221. "most of the methodological work in modern linguistics",
  222. "a subset of English sentences interesting on quite independent grounds",
  223. "the natural general principle that will subsume this case",
  224. "an important property of these three types of EC",
  225. "any associated supporting element",
  226. "the appearance of parasitic gaps in domains relatively inaccessible to ordinary extraction",
  227. "the speaker-hearer's linguistic intuition",
  228. "the descriptive power of the base component",
  229. "the earlier discussion of deviance",
  230. "this analysis of a formative as a pair of sets of features",
  231. "this selectionally introduced contextual feature",
  232. "a descriptively adequate grammar",
  233. "the fundamental error of regarding functional notions as categorial",
  234. "relational information",
  235. "the systematic use of complex symbols",
  236. "the theory of syntactic features developed earlier",
  237. ]
  238. verbs= [
  239. "can be defined in such a way as to impose",
  240. "delimits",
  241. "suffices to account for",
  242. "cannot be arbitrary in",
  243. "is not subject to",
  244. "does not readily tolerate",
  245. "raises serious doubts about",
  246. "is not quite equivalent to",
  247. "does not affect the structure of",
  248. "may remedy and, at the same time, eliminate",
  249. "is not to be considered in determining",
  250. "is to be regarded as",
  251. "is unspecified with respect to",
  252. "is, apparently, determined by",
  253. "is necessary to impose an interpretation on",
  254. "appears to correlate rather closely with",
  255. "is rather different from",
  256. ]
  257. objects = [
  258. "problems of phonemic and morphological analysis.",
  259. "a corpus of utterance tokens upon which conformity has been defined by the paired utterance test.",
  260. "the traditional practice of grammarians.",
  261. "the levels of acceptability from fairly high (e.g. (99a)) to virtual gibberish (e.g. (98d)).",
  262. "a stipulation to place the constructions into these various categories.",
  263. "a descriptive fact.",
  264. "a parasitic gap construction.",
  265. "the extended c-command discussed in connection with (34).",
  266. "the ultimate standard that determines the accuracy of any proposed grammar.",
  267. "the system of base rules exclusive of the lexicon.",
  268. "irrelevant intervening contexts in selectional rules.",
  269. "nondistinctness in the sense of distinctive feature theory.",
  270. "a general convention regarding the forms of the grammar.",
  271. "an abstract underlying order.",
  272. "an important distinction in language use.",
  273. "the requirement that branching is not tolerated within the dominance scope of a complex symbol.",
  274. "the strong generative capacity of the theory.",
  275. ]
  276. def format_wisdom(text,line_length=72):
  277. try:
  278. import textwrap
  279. return textwrap.fill(text, line_length)
  280. except:
  281. return text
  282. def chomsky(times = 1):
  283. if not isinstance(times, int):
  284. return format_wisdom(__doc__)
  285. import random
  286. prevparts = []
  287. newparts = []
  288. output = []
  289. for i in range(times):
  290. for partlist in (leadins, subjects, verbs, objects):
  291. while 1:
  292. part = random.choice(partlist)
  293. if part not in prevparts:
  294. break
  295. newparts.append(part)
  296. output.append(' '.join(newparts))
  297. prevparts = newparts
  298. newparts = []
  299. return format_wisdom(' '.join(output))
  300. from reportlab import rl_config
  301. if rl_config.invariant:
  302. import random
  303. #monkey patch random.randrange
  304. class RLMonkeyPatchRandom(random.Random):
  305. def randrange(self, start, stop=None, step=1, _int=int, _maxwidth=1<<random.BPF):
  306. """Choose a random item from range(start, stop[, step]).
  307. This fixes the problem with randint() which includes the
  308. endpoint; in Python this is usually not what you want.
  309. """
  310. # This code is a bit messy to make it fast for the
  311. # common case while still doing adequate error checking.
  312. istart = _int(start)
  313. if istart != start:
  314. raise ValueError("non-integer arg 1 for randrange()")
  315. if stop is None:
  316. if istart > 0:
  317. if istart >= _maxwidth:
  318. return self._randbelow(istart)
  319. return _int(self.random() * istart)
  320. raise ValueError("empty range for randrange()")
  321. # stop argument supplied.
  322. istop = _int(stop)
  323. if istop != stop:
  324. raise ValueError("non-integer stop for randrange()")
  325. width = istop - istart
  326. if step == 1 and width > 0:
  327. # Note that
  328. # int(istart + self.random()*width)
  329. # instead would be incorrect. For example, consider istart
  330. # = -2 and istop = 0. Then the guts would be in
  331. # -2.0 to 0.0 exclusive on both ends (ignoring that random()
  332. # might return 0.0), and because int() truncates toward 0, the
  333. # final result would be -1 or 0 (instead of -2 or -1).
  334. # istart + int(self.random()*width)
  335. # would also be incorrect, for a subtler reason: the RHS
  336. # can return a long, and then randrange() would also return
  337. # a long, but we're supposed to return an int (for backward
  338. # compatibility).
  339. if width >= _maxwidth:
  340. return _int(istart + self._randbelow(width))
  341. return _int(istart + _int(self.random()*width))
  342. if step == 1:
  343. raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
  344. # Non-unit step argument supplied.
  345. istep = _int(step)
  346. if istep != step:
  347. raise ValueError("non-integer step for randrange()")
  348. if istep > 0:
  349. n = (width + istep - 1) // istep
  350. elif istep < 0:
  351. n = (width + istep + 1) // istep
  352. else:
  353. raise ValueError("zero step for randrange()")
  354. if n <= 0:
  355. raise ValueError("empty range for randrange()")
  356. if n >= _maxwidth:
  357. return istart + istep*self._randbelow(n)
  358. return istart + istep*_int(self.random() * n)
  359. def choice(self, seq):
  360. """Choose a random element from a non-empty sequence."""
  361. return seq[int(self.random() * len(seq))]
  362. random.Random.randrange = RLMonkeyPatchRandom.randrange
  363. random.Random.choice = RLMonkeyPatchRandom.choice
  364. random.randrange = random._inst.randrange
  365. random.choice = random._inst.choice
  366. del RLMonkeyPatchRandom
  367. if not getattr(rl_config,'_random',None):
  368. rl_config._random = 1
  369. random.seed(2342471922)
  370. del random
  371. del rl_config
  372. def randomText(theme=STARTUP, sentences=5):
  373. #this may or may not be appropriate in your company
  374. if type(theme)==type(''):
  375. if theme.lower()=='chomsky': return chomsky(sentences)
  376. elif theme.upper() in ('STARTUP','COMPUTERS','BLAH','BUZZWORD','STARTREK','PRINTING','PYTHON'):
  377. theme = globals()[theme.upper()]
  378. else:
  379. raise ValueError('Unknown theme "%s"' % theme)
  380. from random import randint, choice
  381. RANDOMWORDS = theme
  382. #sentences = 5
  383. output = ""
  384. for sentenceno in range(randint(1,sentences)):
  385. output = output + 'Blah'
  386. for wordno in range(randint(10,25)):
  387. if randint(0,4)==0:
  388. word = choice(RANDOMWORDS)
  389. else:
  390. word = 'blah'
  391. output = output + ' ' +word
  392. output = output+'. '
  393. return output
  394. if __name__=='__main__':
  395. import sys
  396. argv = sys.argv[1:]
  397. if argv:
  398. theme = argv.pop(0)
  399. if argv:
  400. sentences = int(argv.pop(0))
  401. else:
  402. sentences = 5
  403. try:
  404. print(randomText(theme,sentences))
  405. except:
  406. sys.stderr.write("Usage: randomtext.py [theme [#sentences]]\n")
  407. sys.stderr.write(" theme in chomsky|STARTUP|COMPUTERS|BLAH|BUZZWORD|STARTREK|PRINTING|PYTHON\n")
  408. raise
  409. else:
  410. print(chomsky(5))