Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 47 additions & 13 deletions tensorboard/plugins/projector/projector_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,23 @@ def _parse_positive_int_param(request, param_name):


def _rel_to_abs_asset_path(fpath, config_fpath):
fpath = os.path.expanduser(fpath)
if not os.path.isabs(fpath):
return os.path.join(os.path.dirname(config_fpath), fpath)
return fpath
config_dir = os.path.realpath(
os.path.dirname(os.path.expanduser(config_fpath))
)
candidate = os.path.expanduser(fpath)
if not os.path.isabs(candidate):
candidate = os.path.join(config_dir, candidate)
candidate = os.path.realpath(candidate)
error_message = 'Asset path "%s" resolves outside the config directory' % (
fpath
)
try:
common_path = os.path.commonpath([config_dir, candidate])
except ValueError as e:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there other possible ValueErrors that are expected besides the one we're raising? This seems a bit odd to me. Let's just raise the error with the proper error message directly?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated this.

I changed it so we only catch ValueError from os.path.commonpath(...) itself and otherwise raise the final error message directly when the resolved path is outside the config directory.

Thanks!

raise ValueError(error_message) from e
if common_path != config_dir:
raise ValueError(error_message)
return candidate


def _using_tf():
Expand Down Expand Up @@ -363,9 +376,18 @@ def _augment_configs_with_checkpoint_info(self):
embedding.tensor_name = embedding.tensor_name[:-2]
# Find the size of embeddings associated with a tensors file.
if embedding.tensor_path:
fpath = _rel_to_abs_asset_path(
embedding.tensor_path, self.config_fpaths[run]
)
try:
fpath = _rel_to_abs_asset_path(
embedding.tensor_path, self.config_fpaths[run]
)
except ValueError as e:
logger.warning(
'Skipping tensor path "%s" for run "%s": %s',
embedding.tensor_path,
run,
e,
)
continue
tensor = self.tensor_cache.get((run, embedding.tensor_name))
if tensor is None:
try:
Expand Down Expand Up @@ -594,7 +616,10 @@ def _serve_metadata(self, request):
"text/plain",
400,
)
fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run])
try:
fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run])
except ValueError as e:
return Respond(request, str(e), "text/plain", 400)
if not tf.io.gfile.exists(fpath) or tf.io.gfile.isdir(fpath):
return Respond(
request,
Expand Down Expand Up @@ -651,9 +676,12 @@ def _serve_tensor(self, request):
embedding = self._get_embedding(name, config)

if embedding and embedding.tensor_path:
fpath = _rel_to_abs_asset_path(
embedding.tensor_path, self.config_fpaths[run]
)
try:
fpath = _rel_to_abs_asset_path(
embedding.tensor_path, self.config_fpaths[run]
)
except ValueError as e:
return Respond(request, str(e), "text/plain", 400)
if not tf.io.gfile.exists(fpath):
return Respond(
request,
Expand Down Expand Up @@ -720,7 +748,10 @@ def _serve_bookmarks(self, request):
"text/plain",
400,
)
fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run])
try:
fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run])
except ValueError as e:
return Respond(request, str(e), "text/plain", 400)
if not tf.io.gfile.exists(fpath) or tf.io.gfile.isdir(fpath):
return Respond(
request,
Expand Down Expand Up @@ -766,7 +797,10 @@ def _serve_sprite_image(self, request):
)

fpath = os.path.expanduser(embedding_info.sprite.image_path)
fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run])
try:
fpath = _rel_to_abs_asset_path(fpath, self.config_fpaths[run])
except ValueError as e:
return Respond(request, str(e), "text/plain", 400)
if not tf.io.gfile.exists(fpath) or tf.io.gfile.isdir(fpath):
return Respond(
request,
Expand Down
135 changes: 134 additions & 1 deletion tensorboard/plugins/projector/projector_plugin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ def __init__(self, *args, **kwargs):
self.server = None

def setUp(self):
self.log_dir = self.get_temp_dir()
self.test_dir = self.get_temp_dir()
self.log_dir = os.path.join(self.test_dir, "log_dir")
self.restricted_dir = os.path.join(self.test_dir, "restricted_dir")
tf.io.gfile.makedirs(self.log_dir)
tf.io.gfile.makedirs(self.restricted_dir)

def testRunsWithValidCheckpoint(self):
self._GenerateProjectorTestData()
Expand Down Expand Up @@ -197,6 +201,93 @@ def testBookmarks(self):
bookmark = self._GetJson(url)
self.assertEqual(bookmark, {"a": "b"})

def testMetadataServesRelativeFileWithinLogdir(self):
self._GenerateProjectorAssetsTestData(metadata_path="metadata.tsv")
self._WriteTextFile(
os.path.join(self.log_dir, "metadata.tsv"), "label\nvalue\n"
)
self._SetupWSGIApp()

response = self._Get(
"/data/plugin/projector/metadata?run=.&name=embedding"
)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.data, b"label\nvalue\n")

def testMetadataRejectsTraversalOutsideLogdir(self):
outside_metadata_path = os.path.join(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests might be clearer and less error-prone if we use hard-coded, readable values, instead of relying on logic, e.g. /home/my_username/log_dir/ and /home/my_username/restricted_dir/

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated this.

I rewrote the setup to use explicit log_dir and restricted_dir directories and hard-coded readable traversal paths like ../restricted_dir/outside_metadata.tsv, so the boundary being tested is clearer.

Thanks!

self.restricted_dir, "outside_metadata.tsv"
)
traversal_path = "../restricted_dir/outside_metadata.tsv"
self._WriteTextFile(outside_metadata_path, "secret\n")
self._GenerateProjectorAssetsTestData(metadata_path=traversal_path)
self._SetupWSGIApp()

response = self._Get(
"/data/plugin/projector/metadata?run=.&name=embedding"
)
self._AssertOutsideConfigDirResponse(response)

def testMetadataRejectsSymlinkOutsideLogdir(self):
outside_metadata_path = os.path.join(
self.restricted_dir, "outside_metadata.tsv"
)
symlink_path = os.path.join(self.log_dir, "metadata-link.tsv")
self._WriteTextFile(outside_metadata_path, "secret\n")
try:
os.symlink(outside_metadata_path, symlink_path)
except (AttributeError, NotImplementedError, OSError) as e:
self.skipTest("symlinks unavailable: %s" % e)
self._GenerateProjectorAssetsTestData(metadata_path="metadata-link.tsv")
self._SetupWSGIApp()

response = self._Get(
"/data/plugin/projector/metadata?run=.&name=embedding"
)
self._AssertOutsideConfigDirResponse(response)

def testTensorRejectsAbsolutePathOutsideLogdir(self):
outside_tensor_path = os.path.join(
self.restricted_dir, "outside_tensor.tsv"
)
self._WriteTextFile(outside_tensor_path, "1.0\t2.0\n")
self._GenerateProjectorAssetsTestData(tensor_path=outside_tensor_path)
self._SetupWSGIApp()

response = self._Get(
"/data/plugin/projector/tensor?run=.&name=embedding"
)
self._AssertOutsideConfigDirResponse(response)

def testBookmarksRejectAbsolutePathOutsideLogdir(self):
outside_bookmarks_path = os.path.join(
self.restricted_dir, "outside_bookmarks.json"
)
self._WriteTextFile(outside_bookmarks_path, '{"label": "secret"}')
self._GenerateProjectorAssetsTestData(
bookmarks_path=outside_bookmarks_path
)
self._SetupWSGIApp()

response = self._Get(
"/data/plugin/projector/bookmarks?run=.&name=embedding"
)
self._AssertOutsideConfigDirResponse(response)

def testSpriteImageRejectsTraversalOutsideLogdir(self):
outside_sprite_path = os.path.join(
self.restricted_dir, "outside_sprite.png"
)
traversal_path = "../restricted_dir/outside_sprite.png"
self._WriteTextFile(outside_sprite_path, "not-an-image")
self._GenerateProjectorAssetsTestData(sprite_image_path=traversal_path)
self._SetupWSGIApp()

response = self._Get(
"/data/plugin/projector/sprite_image?run=.&name=embedding"
)
self._AssertOutsideConfigDirResponse(response)

def testEndpointsNoAssets(self):
g = tf.Graph()

Expand All @@ -213,6 +304,10 @@ def _AssertTensorResponse(self, tensor_bytes, expected_tensor):
)
self.assertTrue(np.array_equal(tensor, expected_tensor))

def _AssertOutsideConfigDirResponse(self, response):
self.assertEqual(response.status_code, 400)
self.assertIn(b"resolves outside the config directory", response.data)

# TODO(#2007): Cleanly separate out projector tests that require real TF
@unittest.skipUnless(USING_REAL_TF, "Test only passes when using real TF")
def testPluginIsActive(self):
Expand Down Expand Up @@ -336,6 +431,44 @@ def _GenerateProjectorTestData(self):
)
saver.save(sess, checkpoint_path)

def _GenerateProjectorAssetsTestData(
self,
tensor_path="tensor.tsv",
metadata_path=None,
bookmarks_path=None,
sprite_image_path=None,
):
self._WriteTextFile(self._ResolveAssetPath(tensor_path), "1.0\t2.0\n")

config = projector_config_pb2.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = "embedding"
embedding.tensor_path = tensor_path
if metadata_path is not None:
embedding.metadata_path = metadata_path
if bookmarks_path is not None:
embedding.bookmarks_path = bookmarks_path
if sprite_image_path is not None:
embedding.sprite.image_path = sprite_image_path

with tf.io.gfile.GFile(
os.path.join(self.log_dir, "projector_config.pbtxt"), "w"
) as f:
f.write(text_format.MessageToString(config))

def _ResolveAssetPath(self, path):
path = os.path.expanduser(path)
if os.path.isabs(path):
return os.path.realpath(path)
return os.path.realpath(os.path.join(self.log_dir, path))

def _WriteTextFile(self, path, contents):
parent = os.path.dirname(path)
if parent:
tf.io.gfile.makedirs(parent)
with tf.io.gfile.GFile(path, "w") as f:
f.write(contents)


class MetadataColumnsTest(tf.test.TestCase):
def testLengthDoesNotMatch(self):
Expand Down
Loading