TokenParser.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. <?php
  2. namespace Doctrine\Common\Annotations;
  3. use function array_merge;
  4. use function count;
  5. use function explode;
  6. use function strtolower;
  7. use function token_get_all;
  8. use const PHP_VERSION_ID;
  9. use const T_AS;
  10. use const T_COMMENT;
  11. use const T_DOC_COMMENT;
  12. use const T_NAME_FULLY_QUALIFIED;
  13. use const T_NAME_QUALIFIED;
  14. use const T_NAMESPACE;
  15. use const T_NS_SEPARATOR;
  16. use const T_STRING;
  17. use const T_USE;
  18. use const T_WHITESPACE;
  19. /**
  20. * Parses a file for namespaces/use/class declarations.
  21. */
  22. class TokenParser
  23. {
  24. /**
  25. * The token list.
  26. *
  27. * @phpstan-var list<mixed[]>
  28. */
  29. private $tokens;
  30. /**
  31. * The number of tokens.
  32. *
  33. * @var int
  34. */
  35. private $numTokens;
  36. /**
  37. * The current array pointer.
  38. *
  39. * @var int
  40. */
  41. private $pointer = 0;
  42. /** @param string $contents */
  43. public function __construct($contents)
  44. {
  45. $this->tokens = token_get_all($contents);
  46. // The PHP parser sets internal compiler globals for certain things. Annoyingly, the last docblock comment it
  47. // saw gets stored in doc_comment. When it comes to compile the next thing to be include()d this stored
  48. // doc_comment becomes owned by the first thing the compiler sees in the file that it considers might have a
  49. // docblock. If the first thing in the file is a class without a doc block this would cause calls to
  50. // getDocBlock() on said class to return our long lost doc_comment. Argh.
  51. // To workaround, cause the parser to parse an empty docblock. Sure getDocBlock() will return this, but at least
  52. // it's harmless to us.
  53. token_get_all("<?php\n/**\n *\n */");
  54. $this->numTokens = count($this->tokens);
  55. }
  56. /**
  57. * Gets the next non whitespace and non comment token.
  58. *
  59. * @param bool $docCommentIsComment If TRUE then a doc comment is considered a comment and skipped.
  60. * If FALSE then only whitespace and normal comments are skipped.
  61. *
  62. * @return mixed[]|string|null The token if exists, null otherwise.
  63. */
  64. public function next($docCommentIsComment = true)
  65. {
  66. for ($i = $this->pointer; $i < $this->numTokens; $i++) {
  67. $this->pointer++;
  68. if (
  69. $this->tokens[$i][0] === T_WHITESPACE ||
  70. $this->tokens[$i][0] === T_COMMENT ||
  71. ($docCommentIsComment && $this->tokens[$i][0] === T_DOC_COMMENT)
  72. ) {
  73. continue;
  74. }
  75. return $this->tokens[$i];
  76. }
  77. return null;
  78. }
  79. /**
  80. * Parses a single use statement.
  81. *
  82. * @return array<string, string> A list with all found class names for a use statement.
  83. */
  84. public function parseUseStatement()
  85. {
  86. $groupRoot = '';
  87. $class = '';
  88. $alias = '';
  89. $statements = [];
  90. $explicitAlias = false;
  91. while (($token = $this->next())) {
  92. if (! $explicitAlias && $token[0] === T_STRING) {
  93. $class .= $token[1];
  94. $alias = $token[1];
  95. } elseif ($explicitAlias && $token[0] === T_STRING) {
  96. $alias = $token[1];
  97. } elseif (
  98. PHP_VERSION_ID >= 80000 &&
  99. ($token[0] === T_NAME_QUALIFIED || $token[0] === T_NAME_FULLY_QUALIFIED)
  100. ) {
  101. $class .= $token[1];
  102. $classSplit = explode('\\', $token[1]);
  103. $alias = $classSplit[count($classSplit) - 1];
  104. } elseif ($token[0] === T_NS_SEPARATOR) {
  105. $class .= '\\';
  106. $alias = '';
  107. } elseif ($token[0] === T_AS) {
  108. $explicitAlias = true;
  109. $alias = '';
  110. } elseif ($token === ',') {
  111. $statements[strtolower($alias)] = $groupRoot . $class;
  112. $class = '';
  113. $alias = '';
  114. $explicitAlias = false;
  115. } elseif ($token === ';') {
  116. $statements[strtolower($alias)] = $groupRoot . $class;
  117. break;
  118. } elseif ($token === '{') {
  119. $groupRoot = $class;
  120. $class = '';
  121. } elseif ($token === '}') {
  122. continue;
  123. } else {
  124. break;
  125. }
  126. }
  127. return $statements;
  128. }
  129. /**
  130. * Gets all use statements.
  131. *
  132. * @param string $namespaceName The namespace name of the reflected class.
  133. *
  134. * @return array<string, string> A list with all found use statements.
  135. */
  136. public function parseUseStatements($namespaceName)
  137. {
  138. $statements = [];
  139. while (($token = $this->next())) {
  140. if ($token[0] === T_USE) {
  141. $statements = array_merge($statements, $this->parseUseStatement());
  142. continue;
  143. }
  144. if ($token[0] !== T_NAMESPACE || $this->parseNamespace() !== $namespaceName) {
  145. continue;
  146. }
  147. // Get fresh array for new namespace. This is to prevent the parser to collect the use statements
  148. // for a previous namespace with the same name. This is the case if a namespace is defined twice
  149. // or if a namespace with the same name is commented out.
  150. $statements = [];
  151. }
  152. return $statements;
  153. }
  154. /**
  155. * Gets the namespace.
  156. *
  157. * @return string The found namespace.
  158. */
  159. public function parseNamespace()
  160. {
  161. $name = '';
  162. while (
  163. ($token = $this->next()) && ($token[0] === T_STRING || $token[0] === T_NS_SEPARATOR || (
  164. PHP_VERSION_ID >= 80000 &&
  165. ($token[0] === T_NAME_QUALIFIED || $token[0] === T_NAME_FULLY_QUALIFIED)
  166. ))
  167. ) {
  168. $name .= $token[1];
  169. }
  170. return $name;
  171. }
  172. /**
  173. * Gets the class name.
  174. *
  175. * @return string The found class name.
  176. */
  177. public function parseClass()
  178. {
  179. // Namespaces and class names are tokenized the same: T_STRINGs
  180. // separated by T_NS_SEPARATOR so we can use one function to provide
  181. // both.
  182. return $this->parseNamespace();
  183. }
  184. }